diff --git a/.buildkite/README.md b/.buildkite/README.md
new file mode 100644
index 00000000000000..b3f74f2b231378
--- /dev/null
+++ b/.buildkite/README.md
@@ -0,0 +1,7 @@
+# Buildkite
+
+This directory contains the Buildkite configuration files for Base Julia CI.
+
+The rootfs image definitions are located in the [rootfs-images](https://github.com/JuliaCI/rootfs-images) repository.
+
+The documentation for the Base Julia CI setup is located in the [base-buildkite-docs](https://github.com/JuliaCI/base-buildkite-docs) repository.
diff --git a/.buildkite/cryptic_repo_keys/README.md b/.buildkite/cryptic_repo_keys/README.md
new file mode 100644
index 00000000000000..93ed17ce4757bb
--- /dev/null
+++ b/.buildkite/cryptic_repo_keys/README.md
@@ -0,0 +1,6 @@
+## Cryptic repository keys
+
+This folder contains RSA-encrypted symmetric AES keys.
+These are used by buildkite agents to decrypt the secrets embedded within this repository.
+Each buildkite agent contains an RSA secret key that is used to unlock the symmetric AES key that was used to encrypt the secrets within this repository.
+For more information, see the [`cryptic` buildkite plugin repository](https://github.com/staticfloat/cryptic-buildkite-plugin).
diff --git a/.buildkite/cryptic_repo_keys/repo_key.2297e5e7 b/.buildkite/cryptic_repo_keys/repo_key.2297e5e7
new file mode 100644
index 00000000000000..2ab9198b4ce2d7
Binary files /dev/null and b/.buildkite/cryptic_repo_keys/repo_key.2297e5e7 differ
diff --git a/.buildkite/pipelines/experimental/0_webui.yml b/.buildkite/pipelines/experimental/0_webui.yml
new file mode 100644
index 00000000000000..54dbbc59d42562
--- /dev/null
+++ b/.buildkite/pipelines/experimental/0_webui.yml
@@ -0,0 +1,24 @@
+# This file represents what is put into the webUI.
+# It is purely for keeping track of the changes we make to the webUI configuration; modifying this file has no effect.
+# We use the `cryptic` buildkite plugin to provide secrets management, which requires some integration into the WebUI's steps.
+agents:
+  queue: "julia"
+  sandbox.jl: "true"
+
+steps:
+  - label: ":unlock: Unlock secrets, launch pipelines"
+    plugins:
+      - staticfloat/cryptic:
+          # Our list of pipelines that should be launched (but don't require a signature)
+          # These pipelines can be modified by any contributor and CI will still run.
+          # Build secrets will not be available in these pipelines (or their children)
+          # but some of our signed pipelines can wait upon the completion of these unsigned
+          # pipelines.
+          unsigned_pipelines:
+            - .buildkite/pipelines/experimental/launch_unsigned_builders.yml
+
+          # Our signed pipelines must have a `signature` or `signature_file` parameter that
+          # verifies the treehash of the pipeline itself and the inputs listed in `inputs`
+          # signed_pipelines:
+          #   - pipeline: .buildkite/pipelines/experimental/misc/foo_bar_baz.yml
+          #     signature: "my_signature"
diff --git a/.buildkite/pipelines/experimental/README.md b/.buildkite/pipelines/experimental/README.md
new file mode 100644
index 00000000000000..f92aac7a1af02f
--- /dev/null
+++ b/.buildkite/pipelines/experimental/README.md
@@ -0,0 +1,7 @@
+## Experimental pipeline (`master` branch only)
+
+This is the [`julia-master->experimental`](https://buildkite.com/julialang/julia-master-experimental) pipeline.
+
+We use this pipeline for builders that are not yet stable enough to go into the main pipeline.
+
+These builders are triggered by GitHub webhook events, such as pushes and pull requests.
diff --git a/.buildkite/pipelines/experimental/launch_unsigned_builders.yml b/.buildkite/pipelines/experimental/launch_unsigned_builders.yml
new file mode 100644
index 00000000000000..f023e19a5c940b
--- /dev/null
+++ b/.buildkite/pipelines/experimental/launch_unsigned_builders.yml
@@ -0,0 +1,6 @@
+steps:
+  - label: ":buildkite: Launch unsigned pipelines"
+    commands: |
+      buildkite-agent pipeline upload .buildkite/pipelines/experimental/misc/sanitizers.yml
+    agents:
+      queue: julia
diff --git a/.buildkite/pipelines/experimental/misc/sanitizers.yml b/.buildkite/pipelines/experimental/misc/sanitizers.yml
new file mode 100644
index 00000000000000..67c0b547d4b205
--- /dev/null
+++ b/.buildkite/pipelines/experimental/misc/sanitizers.yml
@@ -0,0 +1,31 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+
+steps:
+  - label: "asan"
+    key: asan
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.6
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/llvm_passes.x86_64.tar.gz
+          rootfs_treehash: "9dd715500b117a16fcfa419ea0bca0c0ca902cee"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            - "/cache/repos:/cache/repos"
+      # `contrib/check-asan.jl` needs a `julia` binary:
+      - JuliaCI/julia#v1:
+          version: 1.6
+    commands: |
+      echo "--- Build julia-debug with ASAN"
+      contrib/asan/build.sh ./tmp/test-asan -j$${JULIA_NUM_CORES} debug
+      echo "--- Test that ASAN is enabled"
+      contrib/asan/check.jl ./tmp/test-asan/asan/usr/bin/julia-debug
+    timeout_in_minutes: 120
+    # notify:                   # TODO: uncomment this line
+    #   - github_commit_status: # TODO: uncomment this line
+    #       context: "asan"     # TODO: uncomment this line
diff --git a/.buildkite/pipelines/main/0_webui.yml b/.buildkite/pipelines/main/0_webui.yml
new file mode 100644
index 00000000000000..8e7b9c58ea423b
--- /dev/null
+++ b/.buildkite/pipelines/main/0_webui.yml
@@ -0,0 +1,24 @@
+# This file represents what is put into the webUI.
+# It is purely for keeping track of the changes we make to the webUI configuration; modifying this file has no effect.
+# We use the `cryptic` buildkite plugin to provide secrets management, which requires some integration into the WebUI's steps.
+agents:
+  queue: "julia"
+  sandbox.jl: "true"
+
+steps:
+  - label: ":unlock: Unlock secrets, launch pipelines"
+    plugins:
+      - staticfloat/cryptic:
+          # Our list of pipelines that should be launched (but don't require a signature)
+          # These pipelines can be modified by any contributor and CI will still run.
+          # Build secrets will not be available in these pipelines (or their children)
+          # but some of our signed pipelines can wait upon the completion of these unsigned
+          # pipelines.
+          unsigned_pipelines:
+            - .buildkite/pipelines/main/launch_unsigned_builders.yml
+
+          # Our signed pipelines must have a `signature` or `signature_file` parameter that
+          # verifies the treehash of the pipeline itself and the inputs listed in `inputs`
+          signed_pipelines:
+            - pipeline: .buildkite/pipelines/main/misc/signed_pipeline_test.yml
+              signature_file: .buildkite/pipelines/main/misc/signed_pipeline_test.yml.signature
diff --git a/.buildkite/pipelines/main/README.md b/.buildkite/pipelines/main/README.md
new file mode 100644
index 00000000000000..6b9d67bd7cc3ad
--- /dev/null
+++ b/.buildkite/pipelines/main/README.md
@@ -0,0 +1,15 @@
+## Main pipeline
+
+This is the main pipeline. It contains most of the builders. These builders are triggered by GitHub webhook events, such as pushes and pull requests.
+
+We have a different main pipeline for each permanent branch.
+
+For example:
+
+| Permanent Branch | Pipeline                                                                         |
+| ---------------- | -------------------------------------------------------------------------------- |
+| `master`         | [`julia-master`](https://buildkite.com/julialang/julia-master)                   |
+| `release-1.6`    | [`julia-release-1.6`](https://buildkite.com/julialang/julia-release-1-dot-6) |
+| `release-1.7`    | [`julia-release-1.7`](https://buildkite.com/julialang/julia-release-1-dot-7) |
+
+(This is not a complete list.)
diff --git a/.buildkite/pipelines/main/launch_unsigned_builders.yml b/.buildkite/pipelines/main/launch_unsigned_builders.yml
new file mode 100644
index 00000000000000..6e9f0f0d8fa23b
--- /dev/null
+++ b/.buildkite/pipelines/main/launch_unsigned_builders.yml
@@ -0,0 +1,29 @@
+# This file launches all the build jobs that _don't_ require secrets access.
+# These jobs can pass their output off to jobs that do require secrets access,
+# but those privileged steps require signing before they can be run.
+#
+# Yes, this is creating another layer of indirection; the flow now looks like:
+#
+#   [webui] -> launch_unsigned_builders.yml -> misc/whitespace.yml
+#
+# when we could theoretically just have the `webui` launch `misc/whitespace.yml`,
+# however this raises the bar for contributors to add new (unsigned) steps to
+# our CI configuration, so I'd rather live with an extra layer of indirection
+# and only need to touch the webui configuration when we need to alter
+# something about the privileged steps.
+
+steps:
+  - label: ":buildkite: Launch unsigned builders"
+    commands: |
+      # First, we launch the `whitespace` builder, because we want that builder to finish as quickly as possible.
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/whitespace.yml
+
+      # Next, we launch the miscellaneous builders in alphabetical order.
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/doctest.yml
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/embedding.yml
+      buildkite-agent pipeline upload .buildkite/pipelines/main/misc/llvmpasses.yml
+
+      # Finally, we launch the platform builders (`package_*`) and (`tester_*`) in alphabetical order.
+      buildkite-agent pipeline upload .buildkite/pipelines/main/platforms/linux64.yml
+    agents:
+      queue: julia
diff --git a/.buildkite/pipelines/main/misc/doctest.yml b/.buildkite/pipelines/main/misc/doctest.yml
new file mode 100644
index 00000000000000..0a5dc29bcb1c7b
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/doctest.yml
@@ -0,0 +1,36 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+
+steps:
+  - label: "doctest"
+    key: doctest
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.6
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/package_linux.x86_64.tar.gz
+          rootfs_treehash: "8c33c341a864852629b8aac01a6eb6a79b73570e"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            # Include `/cache/repos` so that our `git` version introspection works.
+            - "/cache/repos:/cache/repos"
+    commands: |
+      echo "--- Build Julia from source"
+      make -j 6
+
+      echo "--- Print Julia version info"
+      ./julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
+
+      echo "--- Build Julia docs"
+      make docs
+
+      echo "--- Run Julia doctests"
+      JULIA_NUM_THREADS=1 make -C doc doctest=true
+    timeout_in_minutes: 45
+    notify:
+      - github_commit_status:
+          context: "doctest"
diff --git a/.buildkite/pipelines/main/misc/embedding.yml b/.buildkite/pipelines/main/misc/embedding.yml
new file mode 100644
index 00000000000000..087ca0f68eb3d6
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/embedding.yml
@@ -0,0 +1,34 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+
+steps:
+  - label: "embedding"
+    key: "embedding"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.6
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/package_linux.x86_64.tar.gz
+          rootfs_treehash: "8c33c341a864852629b8aac01a6eb6a79b73570e"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            # Include `/cache/repos` so that our `git` version introspection works.
+            - "/cache/repos:/cache/repos"
+    commands: |
+      prefix="/tmp/prefix"
+      echo "+++ Build julia, deploy to $${prefix}"
+      make -j$${JULIA_NUM_CORES} JULIA_PRECOMPILE=0 prefix=$${prefix} install
+
+      embedding_output="/tmp/embedding-test"
+      echo "+++ Run embedding tests, deploy to $${embedding_output}"
+      mkdir -p "$${embedding_output}"
+      make -j$${JULIA_NUM_CORES} -C test/embedding JULIA="$${prefix}/bin/julia" BIN="$${embedding_output}"
+
+    timeout_in_minutes: 60
+    notify:
+      - github_commit_status:
+          context: "embedding"
diff --git a/.buildkite/pipelines/main/misc/llvmpasses.yml b/.buildkite/pipelines/main/misc/llvmpasses.yml
new file mode 100644
index 00000000000000..a012ace41acffa
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/llvmpasses.yml
@@ -0,0 +1,52 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+
+steps:
+  - label: "analyzegc"
+    key: "analyzegc"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.6
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/llvm_passes.x86_64.tar.gz
+          rootfs_treehash: "9dd715500b117a16fcfa419ea0bca0c0ca902cee"
+          workspaces:
+            # Include `/cache/repos` so that our `git` version introspection works.
+            - "/cache/repos:/cache/repos"
+    commands: |
+      echo "--- Install in-tree LLVM dependencies"
+      make -j$${JULIA_NUM_CORES} -C deps install-llvm install-clang install-llvm-tools install-libuv install-utf8proc install-unwind
+      echo "+++ run clangsa/analyzegc"
+      make -j$${JULIA_NUM_CORES} -C test/clangsa
+      make -j$${JULIA_NUM_CORES} -C src analyzegc
+    timeout_in_minutes: 60
+    notify:
+      - github_commit_status:
+          context: "analyzegc"
+
+  - label: "llvmpasses"
+    key: "llvmpasses"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.6
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.8/package_linux.x86_64.tar.gz
+          rootfs_treehash: "84a323ae8fcc724f8ea5aca5901bbbf4bda3e519"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            - "/cache/repos:/cache/repos"
+    commands: |
+      echo "--- make release"
+      make -j$${JULIA_NUM_CORES} release JULIA_PRECOMPILE=0
+      echo "--- make src/install-analysis-deps"
+      make -j$${JULIA_NUM_CORES} -C src install-analysis-deps
+      echo "+++ make test/llvmpasses"
+      make -j$${JULIA_NUM_CORES} -C test/llvmpasses
+    timeout_in_minutes: 60
+    notify:
+      - github_commit_status:
+          context: "llvmpasses"
diff --git a/.buildkite/pipelines/main/misc/signed_pipeline_test.yml b/.buildkite/pipelines/main/misc/signed_pipeline_test.yml
new file mode 100644
index 00000000000000..fb13ac15a8d65c
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/signed_pipeline_test.yml
@@ -0,0 +1,17 @@
+agents:
+  queue: "julia"
+  os: "linux"
+
+## pipeline that showcases decryption of environment variable
+steps:
+  - label: ":lock: :rocket: Signed pipeline test"
+    plugins:
+      - staticfloat/cryptic#v1:
+          variables:
+            - SECRET_KEY="U2FsdGVkX18tb7st0SuQAvh4Yv4xENxOAu8q9XkmOeDVKBNY4FngEwK3xmiKUqaS"
+    commands: |
+      echo "SECRET_KEY: $${SECRET_KEY}"
+
+# We must accept the signed job id secret in order to propagate secrets
+env:
+  BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET: ${BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET?}
diff --git a/.buildkite/pipelines/main/misc/signed_pipeline_test.yml.signature b/.buildkite/pipelines/main/misc/signed_pipeline_test.yml.signature
new file mode 100644
index 00000000000000..10220c758086a3
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/signed_pipeline_test.yml.signature
@@ -0,0 +1 @@
+Salted__��NE""�;��יܜ��3%���0-R�`�>t��QΪZ6-�N�����He��_5��[�d��zn�@�@B9���H�ۮ���'
\ No newline at end of file
diff --git a/.buildkite/pipelines/main/misc/whitespace.yml b/.buildkite/pipelines/main/misc/whitespace.yml
new file mode 100644
index 00000000000000..3f9bf13421d8e0
--- /dev/null
+++ b/.buildkite/pipelines/main/misc/whitespace.yml
@@ -0,0 +1,23 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+
+steps:
+  - label: "whitespace"
+    key: "whitespace"
+    plugins:
+      - JuliaCI/julia#v1:
+          version: 1.6
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/package_linux.x86_64.tar.gz
+          rootfs_treehash: "8c33c341a864852629b8aac01a6eb6a79b73570e"
+          workspaces:
+            - "/cache/repos:/cache/repos"
+    commands: |
+      make -j$${JULIA_NUM_CORES} check-whitespace
+    timeout_in_minutes: 10
+    notify:
+      - github_commit_status:
+          context: "whitespace"
diff --git a/.buildkite/pipelines/main/platforms/linux64.yml b/.buildkite/pipelines/main/platforms/linux64.yml
new file mode 100644
index 00000000000000..ad5d32a20b3a03
--- /dev/null
+++ b/.buildkite/pipelines/main/platforms/linux64.yml
@@ -0,0 +1,95 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+
+steps:
+  - label: "package_linux64"
+    key: package_linux64
+    plugins:
+      - JuliaCI/julia#v1:
+          # Drop default "registries" directory, so it is not persisted from execution to execution
+          persist_depot_dirs: packages,artifacts,compiled
+          version: 1.6
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/package_linux.x86_64.tar.gz
+          rootfs_treehash: "8c33c341a864852629b8aac01a6eb6a79b73570e"
+          uid: 1000
+          gid: 1000
+          workspaces:
+            # Include `/cache/repos` so that our `git` version introspection works.
+            - "/cache/repos:/cache/repos"
+    commands: |
+      echo "--- Print the short and long commit hashes"
+      SHORT_COMMIT_LENGTH=10
+      SHORT_COMMIT=`echo $$BUILDKITE_COMMIT | cut -c1-$$SHORT_COMMIT_LENGTH`
+      JULIA_DIRECTORY_NAME="julia-$$SHORT_COMMIT"
+      JULIA_BINARYDIST_FILENAME=`make print-JULIA_BINARYDIST_FILENAME | cut -c27-`
+      ARTIFACT_FILE_EXTENSION="tar.gz"
+      ARTIFACT_FILENAME="$$JULIA_BINARYDIST_FILENAME.$$ARTIFACT_FILE_EXTENSION"
+      echo "The full commit is $$BUILDKITE_COMMIT"
+      echo "The Julia directory name will be $$JULIA_DIRECTORY_NAME"
+      echo "The artifact filename will be $$ARTIFACT_FILENAME"
+
+      echo "--- Build Julia from source"
+      make -j 6
+      make release
+      make install
+
+      echo "--- Make sure that the working directory is clean"
+      if [ -z "$(git status --short)" ]; then echo "INFO: The working directory is clean."; else echo "ERROR: The working directory is dirty."; echo "Output of git status:"; git status; exit 1; fi
+
+      echo "--- Print Julia version info"
+      ./julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
+
+      echo "--- Compress build artifacts"
+      ls -ld $$JULIA_DIRECTORY_NAME/
+      rm -rf $$ARTIFACT_FILENAME
+      tar czf $$ARTIFACT_FILENAME $$JULIA_DIRECTORY_NAME/
+      ls -l $$ARTIFACT_FILENAME
+
+      echo "--- Upload build artifacts"
+      buildkite-agent artifact upload $$ARTIFACT_FILENAME
+    timeout_in_minutes: 60
+    notify:
+      - github_commit_status:
+          context: "package_linux64"
+
+  # TODO: uncomment the following lines in order to enable the `tester_linux64` builder
+  # - label: "tester_linux64"
+  #   key: tester_linux64
+  #   depends_on: package_linux64
+  #   plugins:
+  #     - JuliaCI/julia#v1:
+  #         version: 1.6
+  #     - staticfloat/sandbox#v1:
+  #         # TODO: use a separate `tester_linux` image, instead of using the `package_linux` image.
+  #         rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/package_linux.x86_64.tar.gz
+  #         rootfs_treehash: "8c33c341a864852629b8aac01a6eb6a79b73570e"
+  #         uid: 1000
+  #         gid: 1000
+  #         workspaces:
+  #           # Include `/cache/repos` so that our `git` version introspection works.
+  #           - "/cache/repos:/cache/repos"
+  #   env:
+  #     JULIA_SHELL: "/bin/bash"
+  #   commands: |
+  #     echo "--- Download build artifacts"
+  #     rm -rf julia-linux64.tar.gz
+  #     buildkite-agent artifact download julia-linux64.tar.gz .
+  #
+  #     echo "--- Extract build artifacts"
+  #     rm -rf julia-artifact/
+  #     tar xzf julia-linux64.tar.gz julia-artifact/
+  #
+  #     echo "--- Print Julia version info"
+  #     julia-artifact/bin/julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
+  #
+  #     echo "--- Run the Julia test suite"
+  #     unset JULIA_DEPOT_PATH
+  #     julia-artifact/bin/julia .buildkite/utilities/rr/rr_capture.jl julia-artifact/bin/julia -e 'Base.runtests(["all"]; ncores = Sys.CPU_THREADS)'
+  #   timeout_in_minutes: 120
+  #   notify:
+  #     - github_commit_status:
+  #         context: "tester_linux64"
diff --git a/.buildkite/pipelines/scheduled/0_webui.yml b/.buildkite/pipelines/scheduled/0_webui.yml
new file mode 100644
index 00000000000000..8aaf812376b5c9
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/0_webui.yml
@@ -0,0 +1,24 @@
+# This file represents what is put into the webUI.
+# It is purely for keeping track of the changes we make to the webUI configuration; modifying this file has no effect.
+# We use the `cryptic` buildkite plugin to provide secrets management, which requires some integration into the WebUI's steps.
+agents:
+  queue: "julia"
+  sandbox.jl: "true"
+
+steps:
+  - label: ":unlock: Unlock secrets, launch pipelines"
+    plugins:
+      - staticfloat/cryptic:
+          # Our list of pipelines that should be launched (but don't require a signature)
+          # These pipelines can be modified by any contributor and CI will still run.
+          # Build secrets will not be available in these pipelines (or their children)
+          # but some of our signed pipelines can wait upon the completion of these unsigned
+          # pipelines.
+          # unsigned_pipelines:
+          #   - .buildkite/pipelines/scheduled/launch_unsigned_builders.yml
+
+          # Our signed pipelines must have a `signature` or `signature_file` parameter that
+          # verifies the treehash of the pipeline itself and the inputs listed in `inputs`
+          signed_pipelines:
+            - pipeline: .buildkite/pipelines/scheduled/coverage/coverage_linux64.yml
+              signature: "U2FsdGVkX1+lpFo/nKzx3c6xCZPKYTAuunXpOsZG4+s4+iU5LfEpMvtNvpKQjDugRoxQxCItMqB6vr4KZN3KtKhjkLbr8ExAyaPil/N/uFhrLlpwNem9dxHbPrU2l7qo"
diff --git a/.buildkite/pipelines/scheduled/README.md b/.buildkite/pipelines/scheduled/README.md
new file mode 100644
index 00000000000000..ca071dceb2a444
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/README.md
@@ -0,0 +1,5 @@
+## Scheduled pipeline (`master` branch only)
+
+This is the [`julia-master->scheduled`](https://buildkite.com/julialang/julia-master-scheduled) pipeline.
+
+We use this pipeline for scheduled builds. The builders in this pipeline run on a schedule once per day. They are not triggered by GitHub webhooks.
diff --git a/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml b/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml
new file mode 100644
index 00000000000000..ce7a3aca4227de
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/coverage/coverage_linux64.yml
@@ -0,0 +1,40 @@
+agents:
+  queue: "julia"
+  # Only run on `sandbox.jl` machines (not `docker`-isolated ones) since we need nestable sandboxing
+  sandbox.jl: "true"
+  os: "linux"
+
+steps:
+  - label: ":unlock: :coverage: Run coverage test"
+    plugins:
+      - staticfloat/cryptic:
+          variables:
+            - CODECOV_TOKEN="U2FsdGVkX19l0fhdBabbuiEdysyEabkJLRHfxm7CNRkuGbnwPV365sxxC7Czs/CVcws0N1oB4pVwALRRMe36oA=="
+            - COVERALLS_TOKEN="U2FsdGVkX19zopI0hMNzzi2UUOvNVFD8Y0iisFnO/ryVxU7Tit8ZEaeN+gxodRx4CosUUh192F1+q3dTMWRIvw=="
+      - JuliaCI/julia#v1:
+          version: 1.6
+      - staticfloat/sandbox#v1:
+          rootfs_url: https://github.com/JuliaCI/rootfs-images/releases/download/v3.1/package_linux.x86_64.tar.gz
+          rootfs_treehash: "8c33c341a864852629b8aac01a6eb6a79b73570e"
+          uid: 1000
+          gid: 1000
+    commands: |
+      echo "--- Build Julia from source"
+      make -j 6
+
+      echo "--- Print Julia version info"
+      ./julia -e 'using InteractiveUtils; InteractiveUtils.versioninfo()'
+      ./julia -e '@info "" Sys.CPU_THREADS'
+      # this is necessary to make sure that the LibGit2 tests passes
+      git config --global init.defaultBranch master
+
+      echo "--- Run Julia tests in parallel with code coverage enabled"
+      ./julia --code-coverage=all --sysimage-native-code=no .buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl
+
+      echo "--- Process and upload coverage information"
+      ./julia .buildkite/pipelines/scheduled/coverage/upload_coverage.jl
+    timeout_in_minutes: 240 # 240 minutes = 4 hours
+
+# We must accept the signed job id secret in order to propagate secrets
+env:
+  BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET: ${BUILDKITE_PLUGIN_CRYPTIC_BASE64_SIGNED_JOB_ID_SECRET?}
diff --git a/.buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl b/.buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl
new file mode 100644
index 00000000000000..6da608b5e8be9d
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/coverage/run_tests_parallel.jl
@@ -0,0 +1,25 @@
+# Important note: even if one or more tests fail, we will still exit with status code 0.
+
+# The reason for this is that we always want to upload code coverage, even if some of the
+# tests fail. Therefore, even if the `coverage_linux64` builder passes, you should not
+# assume that all of the tests passed. If you want to know if all of the tests are passing,
+# please look at the status of the `tester_*` builders (e.g. `tester_linux64`).
+
+# When running this file, make sure to set all of the following command-line flags:
+# 1. `--code-coverage=all`
+# 2. `--sysimage-native-code=no`
+
+empty!(Base.DEPOT_PATH)
+push!(Base.DEPOT_PATH, mktempdir(; cleanup = true))
+
+const tests = "all"
+const ncores = Sys.CPU_THREADS
+
+@info "" Sys.CPU_THREADS
+@info "" tests ncores
+
+try
+    Base.runtests(tests; ncores)
+catch ex
+    @error "" exception=(ex, catch_backtrace())
+end
diff --git a/.buildkite/pipelines/scheduled/coverage/upload_coverage.jl b/.buildkite/pipelines/scheduled/coverage/upload_coverage.jl
new file mode 100644
index 00000000000000..8d14cded561406
--- /dev/null
+++ b/.buildkite/pipelines/scheduled/coverage/upload_coverage.jl
@@ -0,0 +1,219 @@
+empty!(Base.DEPOT_PATH)
+push!(Base.DEPOT_PATH, mktempdir(; cleanup = true))
+
+import Pkg
+import Logging
+import TOML
+
+Pkg.add(; name = "Coverage", uuid = "a2441757-f6aa-5fb2-8edb-039e3f45d037", version = "1")
+Pkg.precompile()
+
+import Coverage
+
+function process_folders()
+    # `Coverage.process_folder` will have a LOT of `@info` statements that will make the log
+    # way too long. So before we run `Coverage.process_folder`, we disable logging for `@info`
+    # statements. After we run `Coverage.process_folder`, we re-enable logging for `@info`
+    # statements.
+    Logging.disable_logging(Logging.Info)
+    fcs_base   = Coverage.process_folder("base");
+    fcs_stdlib = Coverage.process_folder("stdlib");
+    Logging.disable_logging(Logging.Debug)
+
+    fcs = Coverage.merge_coverage_counts(
+        fcs_base,
+        fcs_stdlib,
+    );
+
+    return fcs
+end
+
+function get_external_stdlib_names(stdlib_dir::AbstractString)
+    filename_list = filter(x -> isfile(joinpath(stdlib_dir, x)), readdir(stdlib_dir))
+    # find all of the files like `Pkg.version`, `Statistics.version`, etc.
+    regex_matches_or_nothing = match.(Ref(r"^([\w].*?)\.version$"), filename_list)
+    regex_matches = filter(x -> x !== nothing, regex_matches_or_nothing)
+    # get the names of the external stdlibs, like `Pkg`, `Statistics`, etc.
+    external_stdlib_names = only.(regex_matches)
+    unique!(external_stdlib_names)
+    sort!(external_stdlib_names)
+    @info "# Begin list of external stdlibs"
+    for (i, x) in enumerate(external_stdlib_names)
+        @info "$(i). $(x)"
+    end
+    @info "# End list of external stdlibs"
+    return external_stdlib_names
+end
+
+function get_external_stdlib_prefixes(stdlib_dir::AbstractString)
+    external_stdlib_names = get_external_stdlib_names(stdlib_dir)
+    prefixes_1 = joinpath.(Ref(stdlib_dir), external_stdlib_names, Ref(""))
+    prefixes_2 = joinpath.(Ref(stdlib_dir), string.(external_stdlib_names, Ref("-")))
+    prefixes = vcat(prefixes_1, prefixes_2)
+    unique!(prefixes)
+    sort!(prefixes)
+    # example of what `prefixes` might look like:
+    # 4-element Vector{String}:
+    # "stdlib/Pkg-"
+    # "stdlib/Pkg/"
+    # "stdlib/Statistics-"
+    # "stdlib/Statistics/"
+    return prefixes
+end
+
+function print_coverage_summary(fc::Coverage.FileCoverage)
+    cov_lines, tot_lines = Coverage.get_summary(fc)
+    if cov_lines == tot_lines == 0
+        cov_pct = 0
+    else
+        cov_pct = floor(Int, cov_lines/tot_lines * 100)
+    end
+    pad_1 = 71
+    pad_2 = 15
+    pad_3 = 15
+    col_1 = rpad(fc.filename, pad_1)
+    col_2 = rpad(string(cov_pct, " %"), pad_2)
+    col_3 = string(
+        rpad(string(cov_lines), pad_3),
+        string(tot_lines),
+    )
+    @info "$(col_1) $(col_2) $(col_3)"
+    return nothing
+end
+
+function print_coverage_summary(
+        fcs::Vector{Coverage.FileCoverage}, description::AbstractString,
+    )
+    cov_lines, tot_lines = Coverage.get_summary(fcs)
+    if cov_lines == tot_lines == 0
+        cov_pct = 0
+    else
+        cov_pct = floor(Int, cov_lines/tot_lines * 100)
+    end
+    @info "$(description): $(cov_pct)% ($(cov_lines)/$(tot_lines))"
+    return nothing
+end
+
+function buildkite_env(name::String)
+    value = String(strip(ENV[name]))
+    if isempty(value)
+        throw(ErrorException("environment variable $(name) is empty"))
+    end
+    return value
+end
+
+function buildkite_env(name_1::String, name_2::String, default::String)
+    value_1 = String(strip(ENV[name_1]))
+    value_2 = String(strip(ENV[name_2]))
+    !isempty(value_1) && return value_1
+    !isempty(value_2) && return value_2
+    return default
+end
+
+function buildkite_branch_and_commit()
+    branch = buildkite_env("BUILDKITE_BRANCH")
+    commit = buildkite_env("BUILDKITE_COMMIT")
+    head_rev_parse = String(strip(read(`git rev-parse HEAD`, String)))
+    if strip(commit) == "HEAD"
+        commit = head_rev_parse
+    end
+    if commit !== head_rev_parse
+        msg = "mismatch"
+        @error msg commit head_rev_parse
+        throw(ErrorException(msg))
+    end
+    if !occursin(r"^[a-f0-9]{40}$", commit)
+        msg = "BUILDKITE_COMMIT does not look like a long commit SHA"
+        @error msg commit
+        throw(ErrorException(msg))
+    end
+    return (; branch, commit)
+end
+
+function codecov_buildkite_add_local_to_kwargs()
+    branch, commit = buildkite_branch_and_commit()
+    kwargs = Coverage.Codecov.set_defaults(
+        Dict();
+        branch,
+        commit,
+    )
+    return kwargs
+end
+
+function coveralls_buildkite_query_git_info()
+    branch, commit = buildkite_branch_and_commit()
+    remote_name  = "origin"
+    remote       = buildkite_env("BUILDKITE_REPO")
+    message      = buildkite_env("BUILDKITE_MESSAGE")
+    author_name  = buildkite_env(
+        "BUILDKITE_BUILD_AUTHOR",
+        "BUILDKITE_BUILD_CREATOR",
+        "",
+    )
+    author_email = buildkite_env(
+        "BUILDKITE_BUILD_AUTHOR_EMAIL",
+        "BUILDKITE_BUILD_CREATOR_EMAIL",
+        "",
+    )
+    remotes = [
+        Dict(
+            "name"  => remote_name,
+            "url"   => remote,
+        )
+    ]
+    head = Dict(
+        "id"                => commit,
+        "author_name"       => author_name,
+        "author_email"      => author_email,
+        "committer_name"    => author_name,
+        "committer_email"   => author_email,
+        "message"           => message,
+    )
+    git_info = Dict(
+        "branch"  => branch,
+        "remotes" => remotes,
+        "head"    => head,
+    )
+    return git_info
+end
+
+const fcs = process_folders()
+
+# Only include source code files. Exclude test files, benchmarking files, etc.
+filter!(fcs) do fc
+    occursin(r"^base\/", fc.filename) || occursin("/src/", fc.filename)
+end;
+
+# Exclude all external stdlibs (stdlibs that live in external repos).
+const external_stdlib_prefixes = get_external_stdlib_prefixes("stdlib")
+filter!(fcs) do fc
+    all(x -> !startswith(fc.filename, x), external_stdlib_prefixes)
+end;
+
+# Exclude all stdlib JLLs (stdlibs of the form `stdlib/*_jll/`).
+filter!(fcs) do fc
+    !occursin(r"^stdlib\/[A-Za-z0-9]*?_jll\/", fc.filename)
+end;
+
+sort!(fcs; by = fc -> fc.filename);
+
+print_coverage_summary.(fcs);
+print_coverage_summary(fcs, "Total")
+
+let
+    git_info = coveralls_buildkite_query_git_info()
+    @info "" git_info
+    @info "" git_info["branch"]
+    @info "" git_info["head"]
+
+    # In order to upload to Coveralls, you need to have the `COVERALLS_TOKEN` environment variable defined.
+    Coverage.Coveralls.submit_local(fcs, git_info)
+end
+
+let
+    kwargs = codecov_buildkite_add_local_to_kwargs()
+    @info "" kwargs
+
+    # In order to upload to Codecov, you need to have the `CODECOV_TOKEN` environment variable defined.
+    Coverage.Codecov.submit_generic(fcs, kwargs)
+end
diff --git a/deps/checksums/csl b/.buildkite/pipelines/scheduled/launch_unsigned_builders.yml
similarity index 100%
rename from deps/checksums/csl
rename to .buildkite/pipelines/scheduled/launch_unsigned_builders.yml
diff --git a/.buildkite/utilities/rr/rr_capture.jl b/.buildkite/utilities/rr/rr_capture.jl
new file mode 100644
index 00000000000000..07d57f31ff29cf
--- /dev/null
+++ b/.buildkite/utilities/rr/rr_capture.jl
@@ -0,0 +1,134 @@
+using Dates
+using Pkg
+using Tar
+
+if Base.VERSION < v"1.6"
+    throw(ErrorException("The `rr_capture.jl` script requires Julia 1.6 or greater"))
+end
+
+if length(ARGS) < 1
+    throw(ErrorException("Usage: rr_capture.jl [command...]"))
+end
+
+const TIMEOUT = 2 * 60 * 60 # timeout in seconds
+
+# We only use `rr` on the `tester_linux64` builder
+const use_rr_if_builder_is = "tester_linux64"
+
+const run_id = get(ENV, "BUILDKITE_JOB_ID", "unknown")
+const shortcommit = get(ENV, "BUILDKITE_COMMIT", "unknown")
+const builder = get(ENV, "BUILDKITE_STEP_KEY", use_rr_if_builder_is)
+const use_rr = builder == use_rr_if_builder_is
+
+@info "" run_id shortcommit builder use_rr
+@info "" ARGS
+
+# if !use_rr # TODO: uncomment this line
+if true # TODO: delete this line
+    @info "We will not run the tests under rr"
+    p = run(`$ARGS`)
+    exit(p.exitcode)
+end
+
+@info "We will run the tests under rr"
+
+const num_cores = min(Sys.CPU_THREADS, 8, parse(Int, get(ENV, "JULIA_TEST_NUM_CORES", "8")) + 1)
+@info "" num_cores
+
+proc = nothing
+
+new_env = copy(ENV)
+mktempdir() do dir
+    Pkg.activate(dir)
+    Pkg.add("rr_jll")
+    Pkg.add("Zstd_jll")
+
+    rr_jll = Base.require(Base.PkgId(Base.UUID((0xe86bdf43_55f7_5ea2_9fd0_e7daa2c0f2b4)), "rr_jll"))
+    zstd_jll = Base.require(Base.PkgId(Base.UUID((0x3161d3a3_bdf6_5164_811a_617609db77b4)), "Zstd_jll"))
+    rr(func) = Base.invokelatest(rr_jll.rr, func; adjust_LIBPATH=false)
+    rr() do rr_path
+        capture_script_path = joinpath(dir, "capture_output.sh")
+        loader = Sys.WORD_SIZE == 64 ? "/lib64/ld-linux-x86-64.so.2" : "/lib/ld-linux.so.2"
+        open(capture_script_path, "w") do io
+            write(io, """
+            #!/bin/bash
+
+            $(rr_path) record --nested=detach "\$@" > >(tee -a $(dir)/stdout.log) 2> >(tee -a $(dir)/stderr.log >&2)
+            """)
+        end
+        chmod(capture_script_path, 0o755)
+
+        new_env = copy(ENV)
+        new_env["_RR_TRACE_DIR"] = joinpath(dir, "rr_traces")
+        new_env["RR_LOG"]="all:debug"
+        new_env["RR_LOG_BUFFER"]="100000"
+        new_env["JULIA_RR"] = capture_script_path
+        t_start = time()
+        global proc = run(setenv(`$(rr_path) record --num-cores=$(num_cores) $ARGS`, new_env), (stdin, stdout, stderr); wait=false)
+
+        # Start asynchronous timer that will kill `rr`
+        @async begin
+            sleep(TIMEOUT)
+
+            # If we've exceeded the timeout and `rr` is still running, kill it.
+            if isopen(proc)
+                println(stderr, "\n\nProcess timed out. Signalling `rr` for force-cleanup!")
+                kill(proc, Base.SIGTERM)
+
+                # Give `rr` a chance to cleanup
+                sleep(60)
+
+                if isopen(proc)
+                    println(stderr, "\n\n`rr` failed to cleanup within one minute, killing and exiting immediately!")
+                    kill(proc, Base.SIGKILL)
+                    exit(1)
+                end
+            end
+        end
+
+        # Wait for `rr` to finish, either through naturally finishing its run, or `SIGTERM`.
+        # If we have to `SIGKILL`
+        wait(proc)
+
+        # On a non-zero exit code, upload the `rr` trace
+        if !success(proc)
+            println(stderr, "`rr` returned $(proc.exitcode), packing and uploading traces...")
+
+            if !isdir(joinpath(dir, "rr_traces"))
+                println(stderr, "No `rr_traces` directory!  Did `rr` itself fail?")
+                exit(1)
+            end
+
+            # Clean up non-traces
+            rm(joinpath(dir, "rr_traces", "latest-trace"))
+            rm(joinpath(dir, "rr_traces", "cpu_lock"))
+
+            # Create a directory for the pack files to go
+            pack_dir = joinpath(dir, "pack")
+            mkdir(pack_dir)
+
+            # Pack all traces
+            trace_dirs = [joinpath(dir, "rr_traces", f) for f in readdir(joinpath(dir, "rr_traces"))]
+            filter!(isdir, trace_dirs)
+            run(ignorestatus(`$(rr_path) pack --pack-dir=$pack_dir $(trace_dirs)`))
+
+            # Tar it up
+            mkpath("dumps")
+            datestr = Dates.format(now(), dateformat"yyyy-mm-dd_HH_MM_SS")
+            dst_path = "dumps/rr-run_$(run_id)-gitsha_$(shortcommit)-$(datestr).tar.zst"
+            zstd_jll.zstdmt() do zstdp
+                tarproc = open(`$zstdp -o $dst_path`, "w")
+                Tar.create(dir, tarproc)
+                close(tarproc.in)
+            end
+        end
+    end
+end
+
+# Pass the exit code back up to Buildkite
+if proc.termsignal != 0
+    ccall(:raise, Cvoid, (Cint,), proc.termsignal)
+    exit(1) # Just in case the signal did not cause an exit
+else
+    exit(proc.exitcode)
+end
diff --git a/.clang-format b/.clang-format
index 0322d0f6749a9d..39b5767a502918 100644
--- a/.clang-format
+++ b/.clang-format
@@ -109,7 +109,6 @@ StatementMacros:
   - checked_intrinsic_ctype
   - cvt_iintrinsic
   - fpiseq_n
-  - fpislt_n
   - ter_fintrinsic
   - ter_intrinsic_ctype
   - un_fintrinsic
diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 00000000000000..35cde5cd5e8543
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,4 @@
+coverage:
+  status:
+    project: off
+    patch: off
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 865abeebd82af2..5fc00a73b47d2e 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,5 @@
-/.github/workflows/ @JuliaLang/github-actions
+CODEOWNERS @JuliaLang/github-actions
+/.github/ @JuliaLang/github-actions
+/.buildkite/ @JuliaLang/github-actions
+
+/.github/workflows/statuses.yml @DilumAluthge
diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
new file mode 100644
index 00000000000000..97ec290abe0130
--- /dev/null
+++ b/.github/workflows/statuses.yml
@@ -0,0 +1,74 @@
+# Please ping @DilumAluthge when making any changes to this file.
+
+# This is just a short-term solution until we have migrated all of CI to Buildkite.
+#
+# 1. TODO: delete this file once we have migrated all of CI to Buildkite.
+#
+# 2. TODO: disable GitHub Actions on the `JuliaLang/julia` repository once we have migrated all
+# of CI to Buildkite.
+
+# Here are some steps that we take in this workflow file for security reasons:
+# 1. We do not checkout any code.
+# 2. We do not run any external actions.
+# 3. We only give `GITHUB_TOKEN` the minimum necessary set of permissions.
+
+name: Statuses
+
+on:
+  push:
+    branches:
+      - 'master'
+      - 'release-*'
+  # When using the `pull_request_target` event, all PRs will get a `GITHUB_TOKEN` that has
+  # write permissions, even if the PR is from a fork.
+  # Therefore, for security reasons, we do not checkout any code in this workflow.
+  pull_request_target:
+    branches:
+      - 'master'
+      - 'release-*'
+
+# These are the permissions for the `GITHUB_TOKEN` token.
+# We should only give the token the minimum necessary set of permissions.
+permissions:
+  statuses: write
+
+jobs:
+  statuses:
+    name: statuses
+    runs-on: ubuntu-latest
+    if: github.repository == 'JuliaLang/julia'
+    strategy:
+      fail-fast: false
+    steps:
+      - run: echo "SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
+        if: github.event_name == 'pull_request_target'
+
+      - run: echo "SHA=${{ github.sha }}" >> $GITHUB_ENV
+        if: github.event_name != 'pull_request_target'
+
+      - run: echo "The SHA is ${{ env.SHA }}"
+
+      # As we incrementally migrate individual jobs from Buildbot to Buildkite, we should
+      # remove them from the `context_list`.
+      - run: |
+          declare -a CONTEXT_LIST=(
+                "buildbot/tester_freebsd64"
+                "buildbot/tester_linux32"
+                "buildbot/tester_linux64"
+                "buildbot/tester_linuxaarch64"
+                "buildbot/tester_macos64"
+                "buildbot/tester_win32"
+                "buildbot/tester_win64"
+                )
+          for CONTEXT in "${CONTEXT_LIST[@]}"
+          do
+            curl \
+              -X POST \
+              -H "Authorization: token $GITHUB_TOKEN" \
+              -H "Accept: application/vnd.github.v3+json" \
+              -d "{\"context\": \"$CONTEXT\", \"state\": \"$STATE\"}" \
+            https://api.github.com/repos/JuliaLang/julia/statuses/${{ env.SHA }}
+          done
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          STATE: "pending"
diff --git a/.gitignore b/.gitignore
index 2c5ee63bc3ee97..2fabd2bff211aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@
 *.so
 *.dylib
 *.dSYM
+*.h.gen
 *.jl.cov
 *.jl.*.cov
 *.jl.mem
diff --git a/.mailmap b/.mailmap
index bcb3c842a76052..204c08bb26b61a 100644
--- a/.mailmap
+++ b/.mailmap
@@ -257,3 +257,6 @@ Curtis Vogt <curtis.vogt@gmail.com> <curtis.vogt@invenia.ca>
 
 Rafael Fourquet <fourquet.rafael@gmail.com> <fourquet.rafael@gmail.com>
 Rafael Fourquet <fourquet.rafael@gmail.com> <fourquet.rafael+github@gmail.com>
+
+Nathan Daly <NHDaly@gmail.com> <NHDaly@gmail.com>
+Nathan Daly <NHDaly@gmail.com> <nhDaly@gmail.com>
\ No newline at end of file
diff --git a/CITATION.bib b/CITATION.bib
index af8cffe7aa5248..f1361a1eea0b89 100644
--- a/CITATION.bib
+++ b/CITATION.bib
@@ -8,68 +8,8 @@ @article{Julia-2017
     pages={65--98},
     year={2017},
     publisher={SIAM},
-    doi={10.1137/141000671}
+    doi={10.1137/141000671},
+    url={https://epubs.siam.org/doi/10.1137/141000671}
 }
 
-% The following citations are about specific aspects of Julia.
-
-@article{Julia-2019-a,
-   author = {Bezanson, Jeff and Chen, Jiahao and Chung, Benjamin and Karpinski, Stefan and Shah, Viral B. and Vitek, Jan and Zoubritzky, Lionel},
-   title = {Julia: Dynamism and Performance Reconciled by Design},
-   journal = {Proc. ACM Program. Lang.},
-   issue_date = {November 2018},
-   volume = {2},
-   number = {OOPSLA},
-   month = oct,
-   year = {2018},
-   issn = {2475-1421},
-   pages = {120:1--120:23},
-   articleno = {120},
-   numpages = {23},
-   url = {https://doi.acm.org/10.1145/3276490},
-   doi = {10.1145/3276490},
-   acmid = {3276490},
-   publisher = {ACM},
-   address = {New York, NY, USA},
-   keywords = {dynamic languages, just-in-time compilation, multiple dispatch},
-} 
-
-@article{Julia-2019-b,
-   author = {Zappa Nardelli, Francesco and Belyakova, Julia and Pelenitsyn, Artem and Chung, Benjamin and Bezanson, Jeff and Vitek, Jan},
-   title = {Julia Subtyping: A Rational Reconstruction},
-   journal = {Proc. ACM Program. Lang.},
-   issue_date = {November 2018},
-   volume = {2},
-   number = {OOPSLA},
-   month = oct,
-   year = {2018},
-   issn = {2475-1421},
-   pages = {113:1--113:27},
-   articleno = {113},
-   numpages = {27},
-   url = {https://doi.acm.org/10.1145/3276483},
-   doi = {10.1145/3276483},
-   acmid = {3276483},
-   publisher = {ACM},
-   address = {New York, NY, USA},
-   keywords = {Multiple Dispatch, Subtyping},
-} 
-
-@inproceedings{Julia-2014,
-   author = {Bezanson, Jeff and Chen, Jiahao and Karpinski, Stefan and Shah, Viral and Edelman, Alan},
-   title = {Array Operators Using Multiple Dispatch: A Design Methodology for Array Implementations in Dynamic Languages},
-   booktitle = {Proceedings of ACM SIGPLAN International Workshop on Libraries, Languages, and Compilers for Array Programming},
-   series = {ARRAY'14},
-   year = {2014},
-   isbn = {978-1-4503-2937-8},
-   location = {Edinburgh, United Kingdom},
-   pages = {56:56--56:61},
-   articleno = {56},
-   numpages = {6},
-   url = {https://doi.acm.org/10.1145/2627373.2627383},
-   doi = {10.1145/2627373.2627383},
-   acmid = {2627383},
-   publisher = {ACM},
-   address = {New York, NY, USA},
-   keywords = {Julia, array indexing, dynamic dispatch, multiple dispatch, static analysis, type inference},
-} 
+% For more details on research related to Julia, see https://julialang.org/research
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000000000..a25d61b69d849f
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,40 @@
+cff-version: 1.2.0
+message: "Cite this paper whenever you use Julia"
+authors:
+- family-names: "Bezanson"
+  given-names: "Jeff"
+- family-names: "Edelman"
+  given-names: "Alan"
+- family-names: "Karpinski"
+  given-names: "Stefan"
+- family-names: "Shah"
+  given-names: "Viral B."
+title: "Julia: A fresh approach to numerical computing"
+version: "v1"
+license: "MIT"
+doi: "10.1137/141000671"
+date-released: 2017-02-07
+url: "https://julialang.org"
+preferred-citation:
+  authors:
+    - family-names: "Bezanson"
+      given-names: "Jeff"
+    - family-names: "Edelman"
+      given-names: "Alan"
+    - family-names: "Karpinski"
+      given-names: "Stefan"
+    - family-names: "Shah"
+      given-names: "Viral B."
+  doi: "10.1137/141000671"
+  journal: "SIAM Review"
+  month: 9
+  start: 65
+  end: 98
+  pages: 33
+  title: "Julia: A fresh approach to numerical computing"
+  type: article
+  volume: 59
+  issue: 1
+  year: 2017
+  publisher:
+    - name: "SIAM"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index fbca229de0d45a..a0122d5f241279 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,7 +6,7 @@ If you are already familiar with Julia itself, this blog post by Katharine Hyatt
 
 ## Learning Julia
 
-[The learning page](https://julialang.org/learning) has a great list of resources for new and experienced users alike. [This tutorial video](https://www.youtube.com/watch?v=vWkgEddb4-A) is one recommended starting point, as is the "[Invitation to Julia](https://www.youtube.com/watch?v=gQ1y5NUD_RI)" workshop video from JuliaCon 2015  ([slide materials here](https://github.com/dpsanders/invitation_to_julia)). The [Julia documentation](https://docs.julialang.org) covers the language and core library features, and is searchable.
+[The learning page](https://julialang.org/learning) has a great list of resources for new and experienced users alike.
 
 ## Before filing an issue
 
@@ -158,7 +158,9 @@ Examples written within docstrings can be used as testcases known as "doctests"
     "DOCSTRING TEST"
     ```
 
-A doctest needs to match an interactive REPL including the `julia>` prompt. To run doctests you need to run `make -C doc doctest=true` from the root directory. It is recommended to add the header `# Examples` above the doctests.
+A doctest needs to match an interactive REPL including the `julia>` prompt. It is recommended to add the header `# Examples` above the doctests.
+
+To run doctests you need to run `make -C doc doctest=true` from the root directory. You can use `make -C doc doctest=true revise=true` if you are modifying the doctests and don't want to rebuild Julia after each change (see details below about the Revise.jl workflow).
 
 #### News-worthy changes
 
@@ -287,6 +289,11 @@ runtest harness).
    - To remove whitespace relative to the `master` branch, run
      `git rebase --whitespace=fix master`.
 
+#### Git Recommendations For Pull Request Reviewers
+
+- When merging, we generally like `squash+merge`. Unless it is the rare case of a PR with carefully staged individual commits that you want in the history separately, in which case `merge` is acceptable, but usually prefer `squash+merge`.
+
+
 ## Resources
 
 * Julia
diff --git a/HISTORY.md b/HISTORY.md
index a98d0d984778a1..7ac06836750c8b 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,3 +1,371 @@
+Julia v1.7 Release Notes
+========================
+
+New language features
+---------------------
+
+* `(; a, b) = x` can now be used to destructure properties `a` and `b` of `x`.
+  This syntax is equivalent to `a = getproperty(x, :a); b = getproperty(x, :b)` ([#39285]).
+* Implicit multiplication by juxtaposition is now allowed for radical symbols (e.g. `x√y` and `x∛y`) ([#40173]).
+* The short-circuiting operators `&&` and `||` can now be dotted to participate in broadcast fusion
+  as `.&&` and `.||` ([#39594]).
+* `⫪` (U+2AEA, `\Top`, `\downvDash`) and `⫫` (U+2AEB, `\Bot`, `\upvDash`, `\indep`)
+  may now be used as binary operators with comparison precedence ([#39403]).
+* Repeated semicolons can now be used inside array concatenation expressions to separate dimensions
+  of an array, with the number of semicolons specifying the dimension. Just as a single semicolon
+  in `[A; B]` has always described concatenating in the first dimension (vertically), now two
+  semicolons `[A;; B]` do so in the second dimension (horizontally), three semicolons `;;;` in the
+  third, and so on ([#33697]).
+* A backslash (`\`) before a newline inside a string literal now removes the newline while also
+  respecting indentation. This can be used to split up long strings without newlines into multiple
+  lines of code ([#40753]).
+* A backslash before a newline in command literals now always removes the newline, similar to standard string
+  literals, whereas the result was not well-defined before ([#40753]).
+
+Language changes
+----------------
+
+* `macroexpand`, `@macroexpand`, and `@macroexpand1` no longer wrap errors in a `LoadError`.
+  To reduce breakage, `@test_throws` has been modified so that many affected tests will still pass ([#38379]).
+* The middle dot `·` (`\cdotp` U+00b7) and the Greek interpunct `·` (U+0387) are now treated as equivalent to
+  the dot operator `⋅` (`\cdot` U+22c5) (#25157).
+* The minus sign `−` (`\minus` U+2212) is now treated as equivalent to the hyphen-minus sign `-` (U+002d) ([#40948]).
+* Destructuring will no longer mutate values on the left-hand side while iterating through values on
+  the right-hand side. In the example of an array `x`, `x[2], x[1] = x` will now swap the first and
+  second elements of `x`, whereas it used to fill both entries with `x[1]` because `x[2]` was mutated during
+  the iteration of `x` ([#40737]).
+* The default random number generator has changed, so all random numbers will be different (even with the
+  same seed) unless an explicit RNG object is used.
+  See the section on the `Random` standard library below ([#40546]).
+* `Iterators.peel(itr)` now returns `nothing` when `itr` is empty instead of throwing a `BoundsError` ([#39569]).
+* Multiple successive semicolons in an array expresion were previously ignored (e.g., `[1 ;; 2] == [1 ; 2]`).
+  This syntax is now used to separate dimensions (see **New language features**).
+
+Compiler/Runtime improvements
+-----------------------------
+
+
+Command-line option changes
+---------------------------
+
+* The Julia `--project` option and the `JULIA_PROJECT` environment variable now support selecting shared
+  environments like `.julia/environments/myenv` the same way the package management console does:
+  use `julia --project=@myenv` resp. `export JULIA_PROJECT="@myenv"` ([#40025]).
+
+Multi-threading changes
+-----------------------
+
+* Intrinsics for atomic pointer operations are now defined for certain byte sizes ([#37847]).
+* Support for declaring and using individual fields of a mutable struct as atomic has been
+  added; see the new `@atomic` macro ([#37847]).
+* If the `JULIA_NUM_THREADS` environment variable is set to `auto`, then the
+  number of threads will be set to the number of CPU threads ([#38952]).
+* Every `Task` object has a local random number generator state, providing
+  reproducible (schedule-independent) execution of parallel simulation code by
+  default. The default generator is also significantly faster in parallel than
+  in previous versions ([#40546]).
+* Tasks can now migrate among threads when they are re-scheduled. Previously, a Task
+  would always run on whichever thread executed it first ([#40715]).
+
+Build system changes
+--------------------
+
+
+New library functions
+---------------------
+
+* Two argument methods `findmax(f, domain)`, `argmax(f, domain)` and the corresponding
+  `min` versions ([#27613]).
+* `isunordered(x)` returns true if `x` is a value that is normally unordered, such as
+  `NaN` or `missing` ([#35316]).
+* New `keepat!(vector, inds)` function which is the inplace equivalent of `vector[inds]`
+  for a list `inds` of integers ([#36229]).
+* Two arguments method `lock(f, lck)` now accepts a `Channel` as the second argument ([#39312]).
+* New functor `Returns(value)`, which returns `value` for any arguments ([#39794]).
+* New macros `@something` and `@coalesce` which are short-circuiting versions of `something` and
+  `coalesce`, respectively ([#40729]).
+* New function `redirect_stdio` for redirecting `stdin`, `stdout` and `stderr` ([#37978]).
+* New macro `Base.@invoke f(arg1::T1, arg2::T2; kwargs...)` provides an easier syntax to call
+  `invoke(f, Tuple{T1,T2}, arg1, arg2; kwargs...)` ([#38438]).
+* New macro `Base.@invokelatest f(args...; kwargs...)` providing a convenient way to call
+  `Base.invokelatest(f, args...; kwargs...)` ([#37971]).
+
+New library features
+--------------------
+
+* The optional keyword argument `context` of `sprint` can now be set to a tuple of `:key => value`
+  pairs to specify multiple attributes ([#39381]).
+* `bytes2hex` and `hex2bytes` are no longer limited to arguments of type `Union{String,AbstractVector{UInt8}}`
+  and now only require that they're iterable and have a length ([#39710]).
+* `stat(file)` now has a more detailed and user-friendly `show` method ([#39463]).
+
+Standard library changes
+------------------------
+
+* `count` and `findall` now accept an `AbstractChar` argument to search for a character in
+  a string ([#38675]).
+* New methods `range(start, stop)` and `range(start, stop, length)` ([#39228]).
+* `range` now supports `start` as an optional keyword argument ([#38041]).
+* Some operations on ranges will return a `StepRangeLen` instead of a `StepRange`, to allow
+  the resulting step to be zero. Previously, `λ .* (1:9)` gave an error when `λ = 0` ([#40320]).
+* `islowercase` and `isuppercase` are now compliant with the Unicode lower/uppercase categories ([#38574]).
+* `iseven` and `isodd` functions now support non-`Integer` numeric types ([#38976]).
+* `escape_string` now accepts a collection of characters via the keyword
+  `keep` that are to be kept as they are ([#38597]).
+* `getindex` for `NamedTuple`s now accepts a tuple of symbols in order to index multiple values ([#38878]).
+* Subtypes of `AbstractRange` now correctly follow the general array indexing behavior when indexed by
+  `Bool`s, erroring for scalar `Bool`s and treating arrays (including ranges) of `Bool` as
+  logical indices ([#31829]).
+* `keys(::RegexMatch)` is now defined to return the capture's keys, by name if named, or by index if not ([#37299]).
+* `keys(::Generator)` is now defined to return the iterator's keys ([#34678]).
+* `RegexMatch` is now iterable, giving the captured substrings ([#34355]).
+* `lpad/rpad` are now defined in terms of `textwidth` ([#39044]).
+* `Test.@test` now accepts `broken` and `skip` boolean keyword arguments, which
+  mimic `Test.@test_broken` and `Test.@test_skip` behavior, but allows skipping
+  tests failing only under certain conditions.  For example
+  ```julia
+  if T == Float64
+      @test_broken isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
+  else
+      @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
+  end
+  ```
+  can be replaced by
+  ```julia
+  @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T))) broken=(T == Float64)
+  ```
+  ([#39322]).
+* `@lock` is now exported from Base ([#39588]).
+* The experimental function `Base.catch_stack()` has been renamed to `current_exceptions()`, exported
+  from Base and given a more specific return type ([#29901]).
+* Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`,
+  `acotd`, `atand` now accept a square matrix ([#39758]).
+* `replace(::String)` now accepts multiple patterns, which will be applied left-to-right simultaneously,
+  so only one pattern will be applied to any character, and the patterns will only be applied to the input
+  text, not the replacements ([#40484]).
+* New `replace` methods to replace elements of a `Tuple`.
+
+
+#### Package Manager
+
+* If a package is `using` or `import`ed from the `julia>` prompt that isn't found but is available
+  from a registry, a `pkg> add` prompt now offers to install the package into the current environment,
+  precompile it, and continue to load it ([#39026]).
+* A new `Manifest.toml` format is now used that captures extensible metadata fields, including the
+  julia version that generated the manifest. Old format manifests are still supported and will be
+  maintained in their original format, unless the user runs `Pkg.upgrade_manifest()` to upgrade the
+  format of the current environment's manifest without re-resolving ([#40765]).
+* `pkg> precompile` will now precompile new versions of packages that are already loaded, rather than
+  postponing to the next session (the `?`-marked dependencies) ([#40345]).
+* `pkg> rm`, `pin`, and `free` now accept the `--all` argument to call the action on all packages.
+* Registries downloaded from the Pkg Server (not git) are no longer uncompressed into files but instead
+  read directly from the compressed tarball into memory. This improves performance on
+  filesystems which do not handle a large number of files well. To turn this feature off, set the
+  environment variable `JULIA_PKG_UNPACK_REGISTRY=true`.
+* It is now possible to use an external `git` executable instead of the default libgit2 library
+  for the downloads that happen via the Git protocol by setting the environment variable
+  `JULIA_PKG_USE_CLI_GIT=true`.
+* Registries downloaded from the Pkg Server (not git) is now assumed to be immutable. Manual changes
+  to their files might not be picked up by a running Pkg session.
+* Adding packages by directory name in the REPL mode now requires prepending `./` to the name if the
+  package is in the current directory; e.g. `add ./Package` is required instead of `add Package`.
+  This is to avoid confusion between the package name `Package` and the local directory `Package`.
+* The `mode` keyword for `PackageSpec` has been removed.
+
+#### LinearAlgebra
+
+* Use [Libblastrampoline](https://github.com/staticfloat/libblastrampoline/) to pick a BLAS
+  and LAPACK at runtime. By default it forwards to OpenBLAS in the Julia distribution.
+  The forwarding mechanism can be used by packages to replace the BLAS and LAPACK with
+  user preferences ([#39455]).
+* On aarch64, OpenBLAS now uses an ILP64 BLAS like all other 64-bit platforms ([#39436]).
+* OpenBLAS is updated to 0.3.13 ([#39216]).
+* SuiteSparse is updated to 5.8.1 ([#39455]).
+* The shape of an `UpperHessenberg` matrix is preserved under certain arithmetic operations,
+  e.g. when multiplying or dividing by an `UpperTriangular` matrix ([#40039]).
+* Real quasitriangular Schur factorizations `S` can now be efficiently converted to complex
+  upper-triangular form with `Schur{Complex}(S)` ([#40573]).
+* `cis(A)` now supports matrix arguments ([#40194]).
+* `dot` now supports `UniformScaling` with `AbstractMatrix` ([#40250]).
+* `qr[!]` and `lu[!]` now support `LinearAlgebra.PivotingStrategy` (singleton type) values
+  as their optional `pivot` argument: defaults are `qr(A, NoPivot())` (vs. `qr(A, ColumnNorm())`
+  for pivoting) and `lu(A, RowMaximum())` (vs. `lu(A, NoPivot())` without pivoting); the former
+  `Val{true/false}`-based calls are deprecated ([#40623]).
+* `det(M::AbstractMatrix{BigInt})` now calls `det_bareiss(M)`, which uses the
+  [Bareiss](https://en.wikipedia.org/wiki/Bareiss_algorithm) algorithm to calculate precise
+  values ([#40868]).
+
+#### Markdown
+
+
+#### Printf
+
+
+#### Random
+
+* The default random number generator has been changed from Mersenne Twister to
+  [Xoshiro256++](https://prng.di.unimi.it/).
+  The new generator has smaller state, better performance, and superior statistical properties.
+  This generator is the one used for reproducible Task-local randomness ([#40546]).
+
+#### REPL
+
+* Long strings are now elided using the syntax `"head" ⋯ 12345 bytes ⋯ "tail"` when displayed
+  in the REPL ([#40736]).
+* Pasting repl examples into the repl (prompt pasting) now supports all repl modes (`julia`, `pkg`,
+  `shell`, `help?`) and switches mode automatically ([#40604]).
+* `help?>` for modules without docstrings now returns a list of exported names and prints
+  the contents of an associated `README.md` if found ([#39093]).
+
+#### SparseArrays
+
+* new `sizehint!(::SparseMatrixCSC, ::Integer)` method ([#30676]).
+* `cholesky()` now fully preserves the user-specified permutation ([#40560]).
+* `issparse` now applies consistently to all wrapper arrays, including nested, by checking
+  `issparse` on the wrapped parent array ([#37644]).
+
+#### Dates
+
+* The `Dates.periods` function can be used to get the `Vector` of `Period`s that comprise a
+  `CompoundPeriod` ([#39169]).
+
+#### Downloads
+
+* If a cookie header is set in a redirected request, the cookie will now be sent in following
+  requests (<https://github.com/JuliaLang/Downloads.jl/pull/98>).
+* If a `~/.netrc` file exists, it is used to get passwords for authenticated websites
+  (<https://github.com/JuliaLang/Downloads.jl/pull/98>).
+* [Server Name Indication](https://en.wikipedia.org/wiki/Server_Name_Indication) is now sent with
+  all TLS connections, even when the server's identity is not verified (see [NetworkOptions](https://github.com/JuliaLang/NetworkOptions.jl); <https://github.com/JuliaLang/Downloads.jl/pull/114>).
+* When verifying TLS connections on Windows, if the certificate revocation server cannot be
+  reached, the connection is allowed; this matches what other applications do and how revocation
+  is performed on macOS (<https://github.com/JuliaLang/Downloads.jl/pull/115>).
+* There is now a 30-second connection timeout and a 20-second timeout if no data is sent; in
+  combination, this guarantees that connections must make some progress or they will timeout in
+  under a minute (<https://github.com/JuliaLang/Downloads.jl/pull/126>).
+
+#### Statistics
+
+
+#### Sockets
+
+
+#### Tar
+
+* `Tar.extract` now ignores the exact permission mode in a tarball and normalizes modes in the
+  same way that `Tar.create` does, which is, in turn the same way that `git` normalizes them
+  (<https://github.com/JuliaIO/Tar.jl/pull/99>).
+* Functions that consume tarballs now handle hard links: the link target must be a previously seen
+  file; `Tar.list` lists the entry with `:hardlink` type and `.link` field giving the path to the
+  target; other functions — `Tar.extract`, `Tar.rewrite`, `Tar.tree_hash` — treat a hard link as a
+  copy of the target file (<https://github.com/JuliaIO/Tar.jl/pull/102>).
+* The standard format generated by `Tar.create` and `Tar.rewrite` now includes entries for non-empty
+  directories; this shouldn't be neccessary, but some tools that consume tarballs (including docker)
+  are confused by the absence of these directory entries (<https://github.com/JuliaIO/Tar.jl/pull/106>).
+* `Tar` now accepts tarballs with leading spaces in octal integer header fields: this is technically
+  not a valid format according to the POSIX spec, but old Solaris `tar` commands produced tarballs like
+  this so this format does occur in the wild, and it seems harmless to accept it
+  (<https://github.com/JuliaIO/Tar.jl/pull/116>).
+* `Tar.extract` now takes a `set_permissions` keyword argument, which defaults to `true`; if `false` is
+  passed instead, the permissions of extracted files are not modified on extraction
+  (<https://github.com/JuliaIO/Tar.jl/pull/113>).
+
+#### Distributed
+
+
+#### UUIDs
+
+
+#### Mmap
+
+* `mmap` is now exported ([#39816]).
+
+#### DelimitedFiles
+
+* `readdlm` now defaults to `use_mmap=false` on all OSes for consistent reliability in abnormal
+  filesystem situations ([#40415]).
+
+Deprecated or removed
+---------------------
+
+
+External dependencies
+---------------------
+
+
+Tooling Improvements
+---------------------
+
+
+<!--- generated by NEWS-update.jl: -->
+[#27613]: https://github.com/JuliaLang/julia/issues/27613
+[#29901]: https://github.com/JuliaLang/julia/issues/29901
+[#30676]: https://github.com/JuliaLang/julia/issues/30676
+[#31829]: https://github.com/JuliaLang/julia/issues/31829
+[#33697]: https://github.com/JuliaLang/julia/issues/33697
+[#34355]: https://github.com/JuliaLang/julia/issues/34355
+[#34678]: https://github.com/JuliaLang/julia/issues/34678
+[#35316]: https://github.com/JuliaLang/julia/issues/35316
+[#36229]: https://github.com/JuliaLang/julia/issues/36229
+[#37299]: https://github.com/JuliaLang/julia/issues/37299
+[#37644]: https://github.com/JuliaLang/julia/issues/37644
+[#37847]: https://github.com/JuliaLang/julia/issues/37847
+[#37971]: https://github.com/JuliaLang/julia/issues/37971
+[#37978]: https://github.com/JuliaLang/julia/issues/37978
+[#38041]: https://github.com/JuliaLang/julia/issues/38041
+[#38379]: https://github.com/JuliaLang/julia/issues/38379
+[#38438]: https://github.com/JuliaLang/julia/issues/38438
+[#38574]: https://github.com/JuliaLang/julia/issues/38574
+[#38597]: https://github.com/JuliaLang/julia/issues/38597
+[#38675]: https://github.com/JuliaLang/julia/issues/38675
+[#38878]: https://github.com/JuliaLang/julia/issues/38878
+[#38952]: https://github.com/JuliaLang/julia/issues/38952
+[#38976]: https://github.com/JuliaLang/julia/issues/38976
+[#39026]: https://github.com/JuliaLang/julia/issues/39026
+[#39044]: https://github.com/JuliaLang/julia/issues/39044
+[#39093]: https://github.com/JuliaLang/julia/issues/39093
+[#39169]: https://github.com/JuliaLang/julia/issues/39169
+[#39216]: https://github.com/JuliaLang/julia/issues/39216
+[#39228]: https://github.com/JuliaLang/julia/issues/39228
+[#39285]: https://github.com/JuliaLang/julia/issues/39285
+[#39312]: https://github.com/JuliaLang/julia/issues/39312
+[#39322]: https://github.com/JuliaLang/julia/issues/39322
+[#39381]: https://github.com/JuliaLang/julia/issues/39381
+[#39403]: https://github.com/JuliaLang/julia/issues/39403
+[#39436]: https://github.com/JuliaLang/julia/issues/39436
+[#39455]: https://github.com/JuliaLang/julia/issues/39455
+[#39463]: https://github.com/JuliaLang/julia/issues/39463
+[#39569]: https://github.com/JuliaLang/julia/issues/39569
+[#39588]: https://github.com/JuliaLang/julia/issues/39588
+[#39594]: https://github.com/JuliaLang/julia/issues/39594
+[#39710]: https://github.com/JuliaLang/julia/issues/39710
+[#39758]: https://github.com/JuliaLang/julia/issues/39758
+[#39794]: https://github.com/JuliaLang/julia/issues/39794
+[#39816]: https://github.com/JuliaLang/julia/issues/39816
+[#40025]: https://github.com/JuliaLang/julia/issues/40025
+[#40039]: https://github.com/JuliaLang/julia/issues/40039
+[#40173]: https://github.com/JuliaLang/julia/issues/40173
+[#40194]: https://github.com/JuliaLang/julia/issues/40194
+[#40250]: https://github.com/JuliaLang/julia/issues/40250
+[#40320]: https://github.com/JuliaLang/julia/issues/40320
+[#40345]: https://github.com/JuliaLang/julia/issues/40345
+[#40415]: https://github.com/JuliaLang/julia/issues/40415
+[#40484]: https://github.com/JuliaLang/julia/issues/40484
+[#40546]: https://github.com/JuliaLang/julia/issues/40546
+[#40560]: https://github.com/JuliaLang/julia/issues/40560
+[#40573]: https://github.com/JuliaLang/julia/issues/40573
+[#40604]: https://github.com/JuliaLang/julia/issues/40604
+[#40623]: https://github.com/JuliaLang/julia/issues/40623
+[#40715]: https://github.com/JuliaLang/julia/issues/40715
+[#40729]: https://github.com/JuliaLang/julia/issues/40729
+[#40736]: https://github.com/JuliaLang/julia/issues/40736
+[#40737]: https://github.com/JuliaLang/julia/issues/40737
+[#40753]: https://github.com/JuliaLang/julia/issues/40753
+[#40765]: https://github.com/JuliaLang/julia/issues/40765
+[#40868]: https://github.com/JuliaLang/julia/issues/40868
+[#40948]: https://github.com/JuliaLang/julia/issues/40948
+
+
 Julia v1.6 Release Notes
 ========================
 
diff --git a/LICENSE.md b/LICENSE.md
index e2b9c6606b1fd1..1083622cdc2eb4 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,82 +1,26 @@
-The Julia language is licensed under the MIT License. The "language" consists
-of the compiler (the contents of src/), most of the standard library (base/),
-and some utilities (most of the rest of the files in this repository). See below
-for exceptions.
+MIT License
 
-> Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah,
-> and other contributors:
->
-> https://github.com/JuliaLang/julia/contributors
->
-> Permission is hereby granted, free of charge, to any person obtaining
-> a copy of this software and associated documentation files (the
-> "Software"), to deal in the Software without restriction, including
-> without limitation the rights to use, copy, modify, merge, publish,
-> distribute, sublicense, and/or sell copies of the Software, and to
-> permit persons to whom the Software is furnished to do so, subject to
-> the following conditions:
->
-> The above copyright notice and this permission notice shall be
-> included in all copies or substantial portions of the Software.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
 
-Julia includes code from the following projects, which have their own licenses:
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
 
-- [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
-- [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
-- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/jitlayers.cpp and src/disasm.cpp) [BSD-3, effectively]
-- [MUSL](https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT) (for getopt implementation on Windows) [MIT]
-- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
-- [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
-- [Python](https://docs.python.org/3/license.html) (for strtod and joinpath implementation on Windows) [BSD-3, effectively]
-- [Google Benchmark](https://github.com/google/benchmark) (for cyclecount implementation) [Apache 2.0]
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
 
-The following components included in Julia `Base` have their own separate licenses:
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-- base/ryu/* [Boost] (see [ryu](https://github.com/ulfjack/ryu/blob/master/LICENSE-Boost))
-- base/grisu/* [BSD-3] (see [double-conversion](https://github.com/google/double-conversion/blob/master/LICENSE))
-- base/special/{exp,rem_pio2,hyperbolic}.jl [Freely distributable with preserved copyright notice] (see [FDLIBM](https://www.netlib.org/fdlibm))
+end of terms and conditions
 
-The Julia language links to the following external libraries, which have their
-own licenses:
-
-- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
-- [LIBUNWIND](https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=blob_plain;f=LICENSE;hb=master) [MIT]
-- [LIBUV](https://github.com/joyent/libuv/blob/master/LICENSE) [MIT]
-- [LLVM](https://releases.llvm.org/6.0.0/LICENSE.TXT) [BSD-3, effectively]
-- [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
-
-Julia's `stdlib` uses the following external libraries, which have their own licenses:
-
-- [DSFMT](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/LICENSE.txt) [BSD-3]
-- [OPENLIBM](https://github.com/JuliaMath/openlibm/blob/master/LICENSE.md) [MIT, BSD-2, ISC]
-- [GMP](https://gmplib.org/manual/Copying.html#Copying) [LGPL3+ or GPL2+]
-- [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
-- [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
-- [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
-- [MBEDTLS](https://tls.mbed.org/how-to-get) [either GPLv2 or Apache 2.0]
-- [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
-- [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
-- [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
-- [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
-- [SUITESPARSE](http://suitesparse.com) [mix of LGPL2+ and GPL2+; see individual module licenses]
-
-Julia's build process uses the following external tools:
-
-- [PATCHELF](https://nixos.org/patchelf.html)
-- [OBJCONV](https://www.agner.org/optimize/#objconv)
-
-Julia bundles the following external programs and libraries:
-
-- [7-Zip](https://www.7-zip.org/license.txt)
-- [ZLIB](https://zlib.net/zlib_license.html)
-
-On some platforms, distributions of Julia contain SSL certificate authority certificates,
-released under the [Mozilla Public License](https://en.wikipedia.org/wiki/Mozilla_Public_License).
+Please see [THIRDPARTY.md](./THIRDPARTY.md) for license information for other software used in this project.
diff --git a/Make.inc b/Make.inc
index 519720a861ed7f..33bfac180a0fa6 100644
--- a/Make.inc
+++ b/Make.inc
@@ -45,7 +45,7 @@ USE_SYSTEM_BLAS:=0
 USE_SYSTEM_LAPACK:=0
 USE_SYSTEM_GMP:=0
 USE_SYSTEM_MPFR:=0
-USE_SYSTEM_SUITESPARSE:=0
+USE_SYSTEM_LIBSUITESPARSE:=0
 USE_SYSTEM_LIBUV:=0
 USE_SYSTEM_UTF8PROC:=0
 USE_SYSTEM_MBEDTLS:=0
@@ -75,6 +75,9 @@ HAVE_SSP := 0
 WITH_GC_VERIFY := 0
 WITH_GC_DEBUG_ENV := 0
 
+# Enable DTrace support
+WITH_DTRACE := 0
+
 # Prevent picking up $ARCH from the environment variables
 ARCH:=
 
@@ -87,6 +90,10 @@ endef
 COMMA:=,
 SPACE:=$(eval) $(eval)
 
+# force a sane / stable configuration
+export LC_ALL=C
+export LANG=C
+
 # We need python for things like BB triplet recognition and relative path computation.
 # We don't really care about version, generally, so just find something that works:
 PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo "{python|python3|python2} not found")"
@@ -472,13 +479,13 @@ USEGCC := 0
 USECLANG := 1
 endif
 
-# Note: Supporting only macOS Mavericks and above
+# Note: Supporting only macOS Yosemite and above
 ifeq ($(OS), Darwin)
 APPLE_ARCH := $(shell uname -m)
 USEGCC := 0
 USECLANG := 1
 ifneq ($(APPLE_ARCH),arm64)
-MACOSX_VERSION_MIN := 10.9
+MACOSX_VERSION_MIN := 10.10
 else
 MACOSX_VERSION_MIN := 11.0
 endif
@@ -587,6 +594,12 @@ endif #USEMSVC
 RANLIB := $(CROSS_COMPILE)ranlib
 OBJCOPY := $(CROSS_COMPILE)objcopy
 
+ifneq ($(USEMSVC), 1)
+CPP_STDOUT := $(CPP) -P
+else
+CPP_STDOUT := $(CPP) -E
+endif
+
 # file extensions
 ifeq ($(OS), WINNT)
   SHLIB_EXT := dll
@@ -596,6 +609,21 @@ else
   SHLIB_EXT := so
 endif
 
+ifeq ($(OS),WINNT)
+define versioned_libname
+$$(if $(2),$(1)-$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+endef
+else ifeq ($(OS),Darwin)
+define versioned_libname
+$$(if $(2),$(1).$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+endef
+else
+define versioned_libname
+$$(if $(2),$(1).$(SHLIB_EXT).$(2),$(1).$(SHLIB_EXT))
+endef
+endif
+
+
 ifeq ($(SHLIB_EXT), so)
 define SONAME_FLAGS
   -Wl,-soname=$1
@@ -734,6 +762,13 @@ JCXXFLAGS += -DGC_DEBUG_ENV
 JCFLAGS += -DGC_DEBUG_ENV
 endif
 
+ifeq ($(WITH_DTRACE), 1)
+JCXXFLAGS += -DUSE_DTRACE
+JCFLAGS += -DUSE_DTRACE
+DTRACE := dtrace
+else
+endif
+
 # ===========================================================================
 
 # Select the cpu architecture to target, or automatically detects the user's compiler
@@ -898,6 +933,7 @@ ifneq (,$(findstring aarch64,$(ARCH)))
 OPENBLAS_DYNAMIC_ARCH:=0
 OPENBLAS_TARGET_ARCH:=ARMV8
 USE_BLAS64:=1
+BINARY:=64
 ifeq ($(OS),Darwin)
 # Apple Chips are all at least A12Z
 MCPU:=apple-a12
@@ -1008,22 +1044,18 @@ JCPPFLAGS+=-DSYSTEM_LLVM
 endif # SYSTEM_LLVM
 
 # Windows builds need a little help finding the LLVM libraries for llvm-config
-LLVM_CONFIG_PATH_FIX :=
+# use delayed expansion (= not :=) because spawn isn't defined until later
+# WINEPATH is only needed for a wine-based cross compile
+LLVM_CONFIG_PATH_FIX =
 ifeq ($(OS),WINNT)
-LLVM_CONFIG_PATH_FIX := PATH="$(build_bindir):$(PATH)"
+LLVM_CONFIG_PATH_FIX = PATH="$(build_bindir):$(PATH)" WINEPATH="$(call cygpath_w,$(build_bindir));$(WINEPATH)"
 endif
 
 ifeq ($(BUILD_OS),$(OS))
-LLVM_CONFIG_HOST := $(LLVM_CONFIG)
+LLVM_CONFIG_HOST = $(LLVM_CONFIG_PATH_FIX) $(LLVM_CONFIG)
 else
 LLVM_CONFIG_HOST := $(basename $(LLVM_CONFIG))-host$(BUILD_EXE)
-ifeq (exists, $(shell [ -f '$(LLVM_CONFIG_HOST)' ] && echo exists ))
-ifeq ($(shell $(LLVM_CONFIG_PATH_FIX) $(LLVM_CONFIG_HOST) --version),3.3)
-# llvm-config-host <= 3.3 is broken, use llvm-config instead (in an emulator)
-# use delayed expansion (= not :=) because spawn isn't defined until later
-LLVM_CONFIG_HOST = $(LLVM_CONFIG_PATH_FIX) $(call spawn,$(LLVM_CONFIG))
-endif
-else
+ifneq (exists, $(shell [ -f '$(LLVM_CONFIG_HOST)' ] && echo exists ))
 # llvm-config-host does not exist (cmake build)
 LLVM_CONFIG_HOST = $(LLVM_CONFIG_PATH_FIX) $(call spawn,$(LLVM_CONFIG))
 endif
@@ -1147,8 +1179,13 @@ BB_TRIPLET_LIBGFORTRAN := $(subst $(SPACE),-,$(filter-out cxx%,$(subst -,$(SPACE
 BB_TRIPLET_CXXABI := $(subst $(SPACE),-,$(filter-out libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI))))
 BB_TRIPLET := $(subst $(SPACE),-,$(filter-out cxx%,$(filter-out libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI)))))
 
+LIBGFORTRAN_VERSION := $(subst libgfortran,,$(filter libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN))))
+
 # This is the set of projects that BinaryBuilder dependencies are hooked up for.
-BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM SUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP CSL
+# Note: we explicitly _do not_ define `CSL` here, since it requires some more
+# advanced techniques to decide whether it should be installed from a BB source
+# or not.  See `deps/csl.mk` for more detail.
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
@@ -1167,7 +1204,7 @@ $(foreach proj,$(BB_PROJECTS),$(eval $(call SET_BB_DEFAULT,$(proj))))
 
 # Warn if the user tries to build something that requires `gfortran` but they don't have it installed.
 ifeq ($(FC_VERSION),)
-ifneq ($(USE_BINARYBUILDER_OPENBLAS)$(USE_BINARYBUILDER_SUITESPARSE),11)
+ifneq ($(USE_BINARYBUILDER_OPENBLAS)$(USE_BINARYBUILDER_LIBSUITESPARSE),11)
 $(error "Attempting to build OpenBLAS or SuiteSparse without a functioning fortran compiler!")
 endif
 endif
@@ -1223,7 +1260,7 @@ else ifneq ($(USEMSVC), 1)
 endif
 
 ifeq ($(OS), Linux)
-OSLIBS += -Wl,--no-as-needed -ldl -lrt -lpthread -Wl,--export-dynamic,--as-needed,--no-whole-archive
+OSLIBS += -Wl,--no-as-needed -ldl -lrt -lpthread -latomic -Wl,--export-dynamic,--as-needed,--no-whole-archive
 # Detect if ifunc is supported
 IFUNC_DETECT_SRC := 'void (*f0(void))(void) { return (void(*)(void))0L; }; void f(void) __attribute__((ifunc("f0")));'
 ifeq (supported, $(shell echo $(IFUNC_DETECT_SRC) | $(CC) -Werror -x c - -S -o /dev/null > /dev/null 2>&1 && echo supported))
@@ -1249,7 +1286,7 @@ endif
 
 ifeq ($(OS), FreeBSD)
 JLDFLAGS := -Wl,-Bdynamic
-OSLIBS += -lelf -lkvm -lrt -lpthread
+OSLIBS += -lelf -lkvm -lrt -lpthread -latomic
 
 # Tweak order of libgcc_s in DT_NEEDED,
 # make it loaded first to
@@ -1354,8 +1391,8 @@ endif
 ifeq ($(USE_SYSTEM_BLAS),1)
 # Since the names don't line up (`BLAS` vs. `OPENBLAS`), manually gate:
 USE_BINARYBUILDER_OPENBLAS := 0
-# Disable BB SuiteSparse if we're using system BLAS
-USE_BINARYBUILDER_SUITESPARSE := 0
+# Disable BB LIBSUITESPARSE if we're using system BLAS
+USE_BINARYBUILDER_LIBSUITESPARSE := 0
 endif
 
 ifeq ($(USE_SYSTEM_LIBM),1)
@@ -1367,7 +1404,7 @@ endif
 # Note: we're passing *FLAGS here computed based on your system compiler to
 # clang. If that causes you problems, you might want to build and/or run
 # specific clang-sa-* files with clang explicitly selected:
-#   make CC=~+/../usr/tools/clang CXX=~+/../usr/tools/clang USECLANG=1 analyzegc
+#   make CC=~+/../usr/bin/clang CXX=~+/../usr/bin/clang USECLANG=1 analyzegc
 #   make USECLANG=1 clang-sa-*
 CLANGSA_FLAGS :=
 CLANGSA_CXXFLAGS :=
@@ -1479,8 +1516,12 @@ ifneq ($(findstring $(OS),Linux FreeBSD),)
 LIBGCC_NAME := libgcc_s.$(SHLIB_EXT).1
 endif
 
-
+# USE_SYSTEM_CSL causes it to get symlinked into build_private_shlibdir
+ifeq ($(USE_SYSTEM_CSL),1)
+LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBGCC_NAME))
+else
 LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/$(LIBGCC_NAME))
+endif
 LIBGCC_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBGCC_NAME))
 
 # USE_SYSTEM_LIBM and USE_SYSTEM_OPENLIBM causes it to get symlinked into build_private_shlibdir
@@ -1524,6 +1565,7 @@ LINKCOLOR:="\033[34;1m"
 PERLCOLOR:="\033[35m"
 FLISPCOLOR:="\033[32m"
 JULIACOLOR:="\033[32;1m"
+DTRACECOLOR:="\033[32;1m"
 
 SRCCOLOR:="\033[33m"
 BINCOLOR:="\033[37;1m"
@@ -1537,6 +1579,7 @@ PRINT_LINK = printf '    %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$(GOAL)
 PRINT_PERL = printf '    %b %b\n' $(PERLCOLOR)PERL$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 PRINT_FLISP = printf '    %b %b\n' $(FLISPCOLOR)FLISP$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 PRINT_JULIA = printf '    %b %b\n' $(JULIACOLOR)JULIA$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
+PRINT_DTRACE = printf '    %b %b\n' $(DTRACECOLOR)DTRACE$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 
 else
 QUIET_MAKE =
@@ -1546,8 +1589,10 @@ PRINT_LINK = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_PERL = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1)
+PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1)
 
 endif
+
 # Makefile debugging trick:
 # call print-VARIABLE to see the runtime value of any variable
 # (hardened against any special characters appearing in the output)
diff --git a/Makefile b/Makefile
index 823ff3b86339de..62afa8e6855290 100644
--- a/Makefile
+++ b/Makefile
@@ -154,7 +154,8 @@ $(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(buil
 	@$(call PRINT_CC, $(HOSTCC) -o $(build_depsbindir)/stringreplace $(JULIAHOME)/contrib/stringreplace.c)
 
 julia-base-cache: julia-sysimg-$(JULIA_BUILD_MODE) | $(DIRS) $(build_datarootdir)/julia
-	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) $(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/etc/write_base_cache.jl) \
+	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
+		$(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/etc/write_base_cache.jl) \
 		$(call cygpath_w,$(build_datarootdir)/julia/base.cache))
 
 # public libraries, that are installed in $(prefix)/lib
@@ -169,8 +170,7 @@ ifeq ($(BUNDLE_DEBUG_LIBS),1)
 JL_PRIVATE_LIBS-0 += libjulia-internal-debug
 endif
 ifeq ($(USE_GPL_LIBS), 1)
-JL_PRIVATE_LIBS-0 += libsuitesparse_wrapper
-JL_PRIVATE_LIBS-$(USE_SYSTEM_SUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcolamd libklu libldl librbio libspqr libsuitesparseconfig libumfpack
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcolamd libklu libldl librbio libspqr libsuitesparseconfig libumfpack
 endif
 JL_PRIVATE_LIBS-$(USE_SYSTEM_PCRE) += libpcre2-8
 JL_PRIVATE_LIBS-$(USE_SYSTEM_DSFMT) += libdSFMT
@@ -188,7 +188,7 @@ else
 JL_PRIVATE_LIBS-$(USE_SYSTEM_ZLIB) += libz
 endif
 ifeq ($(USE_LLVM_SHLIB),1)
-JL_PRIVATE_LIBS-$(USE_SYSTEM_LLVM) += libLLVM libLLVM-11jl
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LLVM) += libLLVM libLLVM-12jl
 endif
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBUNWIND) += libunwind
 
@@ -290,8 +290,11 @@ endif
 		done \
 	done
 	for suffix in $(JL_PRIVATE_LIBS-1) ; do \
-		lib=$(build_private_libdir)/$${suffix}.$(SHLIB_EXT); \
-		$(INSTALL_M) $$lib $(DESTDIR)$(private_libdir) ; \
+		for lib in $(build_private_libdir)/$${suffix}.$(SHLIB_EXT)*; do \
+			if [ "$${lib##*.}" != "dSYM" ]; then \
+				$(INSTALL_M) $$lib $(DESTDIR)$(private_libdir) ; \
+			fi \
+		done \
 	done
 endif
 	# Install `7z` into libexec/
@@ -448,7 +451,7 @@ endif
 	echo "base/version_git.jl" > light-source-dist.tmp
 
 	# Download all stdlibs and include the tarball filenames in light-source-dist.tmp
-	@$(MAKE) -C stdlib getall NO_GIT=1
+	@$(MAKE) -C stdlib getall DEPS_GIT=0 USE_BINARYBUILDER=0
 	-ls stdlib/srccache/*.tar.gz >> light-source-dist.tmp
 	-ls stdlib/*/StdlibArtifacts.toml >> light-source-dist.tmp
 
@@ -472,7 +475,7 @@ source-dist:
 # Make tarball with Julia code plus all dependencies
 full-source-dist: light-source-dist.tmp
 	# Get all the dependencies downloaded
-	@$(MAKE) -C deps getall NO_GIT=1
+	@$(MAKE) -C deps getall DEPS_GIT=0 USE_BINARYBUILDER=0
 
 	# Create file full-source-dist.tmp to hold all the filenames that go into the tarball
 	cp light-source-dist.tmp full-source-dist.tmp
@@ -570,3 +573,6 @@ endif
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
+
+print-locale:
+	@locale
diff --git a/NEWS.md b/NEWS.md
index 72cd82723ebca1..0835e787099fbf 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,12 +1,18 @@
-Julia v1.7 Release Notes
+Julia v1.8 Release Notes
 ========================
 
+
 New language features
 ---------------------
 
 * `(; a, b) = x` can now be used to destructure properties `a` and `b` of `x`. This syntax is equivalent to `a = getproperty(x, :a)`
   and similarly for `b`. ([#39285])
 * `Module(:name, nothing)` can be used to create a `module` that contains no names (it does not import `Base` or `Core` and does not contain a reference to itself). ([#40110])
+* `@inline` and `@noinline` annotations can be used within a function body to give an extra
+  hint about the inlining cost to the compiler. ([#41312])
+* `@inline` and `@noinline` annotations can now be applied to a function callsite or block
+  to enforce the involved function calls to be (or not to be) inlined. ([#41312])
+* The default behavior of observing `@inbounds` declarations is now an option via `auto` in `--check-bounds=yes|no|auto` ([#41551])
 
 Language changes
 ----------------
@@ -19,12 +25,10 @@ Compiler/Runtime improvements
 Command-line option changes
 ---------------------------
 
-* The Julia `--project` option and the `JULIA_PROJECT` environment variable now support selecting shared environments like `.julia/environments/myenv` the same way the package management console does: use `julia --project=@myenv` resp. `export JULIA_PROJECT="@myenv"` ([#40025]).
-
 
 Multi-threading changes
 -----------------------
-* If the `JULIA_NUM_THREADS` environment variable is set to `auto`, then the number of threads will be set to the number of CPU threads ([#38952])
+
 
 Build system changes
 --------------------
@@ -33,83 +37,82 @@ Build system changes
 New library functions
 ---------------------
 
-* Two argument methods `findmax(f, domain)`, `argmax(f, domain)` and the corresponding `min` versions ([#27613]).
-* `isunordered(x)` returns true if `x` is value that is normally unordered, such as `NaN` or `missing`.
-* New macro `Base.@invokelatest f(args...; kwargs...)` provides a convenient way to call `Base.invokelatest(f, args...; kwargs...)` ([#37971])
-* New macro `Base.@invoke f(arg1::T1, arg2::T2; kwargs...)` provides an easier syntax to call `invoke(f, Tuple{T1,T2}, arg1, arg2; kwargs...)` ([#38438])
+* `hardlink(src, dst)` can be used to create hard links. ([#41639])
 
 New library features
 --------------------
 
-* The optional keyword argument `context` of `sprint` can now be set to a tuple of `:key => value` pairs to specify multiple attributes. ([#39381])
+* `@test_throws "some message" triggers_error()` can now be used to check whether the displayed error text
+  contains "some message" regardless of the specific exception type.
+  Regular expressions, lists of strings, and matching functions are also supported. ([#41888])
 
 Standard library changes
 ------------------------
 
-* `count` and `findall` now accept an `AbstractChar` argument to search for a character in a string ([#38675]).
-* `range` now supports the `range(start, stop)` and `range(start, stop, length)` methods ([#39228]).
-* `range` now supports `start` as an optional keyword argument ([#38041]).
-* `islowercase` and `isuppercase` are now compliant with the Unicode lower/uppercase categories ([#38574]).
-* `iseven` and `isodd` functions now support non-`Integer` numeric types ([#38976]).
-* `escape_string` can now receive a collection of characters in the keyword
-  `keep` that are to be kept as they are. ([#38597]).
-* `getindex` can now be used on `NamedTuple`s with multiple values ([#38878])
-* Subtypes of `AbstractRange` now correctly follow the general array indexing
-  behavior when indexed by `Bool`s, erroring for scalar `Bool`s and treating
-  arrays (including ranges) of `Bool` as an logical index ([#31829])
-* `keys(::RegexMatch)` is now defined to return the capture's keys, by name if named, or by index if not ([#37299]).
-* `keys(::Generator)` is now defined to return the iterator's keys ([#34678])
-* `RegexMatch` now iterate to give their captures. ([#34355]).
+* `range` accepts either `stop` or `length` as a sole keyword argument ([#39241])
+* The `length` function on certain ranges of certain specific element types no longer checks for integer
+  overflow in most cases. The new function `checked_length` is now available, which will try to use checked
+  arithmetic to error if the result may be wrapping. Or use a package such as SaferIntegers.jl when
+  constructing the range. ([#40382])
+* TCP socket objects now expose `closewrite` functionality and support half-open mode usage ([#40783]).
+* Intersect returns a result with the eltype of the type-promoted eltypes of the two inputs ([#41769]).
 
-#### Package Manager
+#### InteractiveUtils
+* A new macro `@time_imports` for reporting any time spent importing packages and their dependencies ([#41612])
 
+#### Package Manager
 
 #### LinearAlgebra
 
-* Use [Libblastrampoline](https://github.com/staticfloat/libblastrampoline/) to pick a BLAS and LAPACK at runtime. By default it forwards to OpenBLAS in the Julia distribution. The forwarding mechanism can be used by packages to replace the BLAS and LAPACK with user preferences. ([#39455])
-* On aarch64, OpenBLAS now uses an ILP64 BLAS like all other 64-bit platforms. ([#39436])
-* OpenBLAS is updated to 0.3.13. ([#39216])
-* SuiteSparse is updated to 5.8.1. ([#39455])
-
 #### Markdown
 
-
 #### Printf
+* Now uses `textwidth` for formatting `%s` and `%c` widths ([#41085]).
 
+#### Profile
+* Profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
+  grouping by thread, task, or nested thread/task, task/thread, and `threads` and `tasks` kwargs to allow filtering.
+  Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at
+  each sample. `Profile.fetch()` by default strips out the new metadata to ensure backwards compatibility with external
+  profiling data consumers, but can be included with the `include_meta` kwarg. ([#41742])
 
 #### Random
 
-
 #### REPL
 
+* ` ?(x, y` followed by TAB displays all methods that can be called
+  with arguments `x, y, ...`. (The space at the beginning prevents entering help-mode.)
+  `MyModule.?(x, y` limits the search to `MyModule`. TAB requires that at least one
+  argument have a type more specific than `Any`; use SHIFT-TAB instead of TAB
+  to allow any compatible methods.
 
 #### SparseArrays
 
-
 #### Dates
 
-* The `Dates.periods` function can be used to get the `Vector` of `Period`s that comprise a `CompoundPeriod` ([#39169]).
+#### Downloads
 
 #### Statistics
 
-
 #### Sockets
 
+#### Tar
 
 #### Distributed
 
-
 #### UUIDs
 
-
 #### Mmap
 
-* `mmap` is now exported ([#39816]).
+#### DelimitedFiles
+
+#### Logging
+* The standard log levels `BelowMinLevel`, `Debug`, `Info`, `Warn`, `Error`,
+  and `AboveMaxLevel` are now exported from the Logging stdlib ([#40980]).
 
 
 Deprecated or removed
 ---------------------
-- Multiple successive semicolons in an array expresion were previously ignored (e.g. `[1 ;; 2] == [1 ; 2]`). Multiple semicolons are being reserved for future syntax and may have different behavior in a future release.
 
 
 External dependencies
diff --git a/README.md b/README.md
index f158ab22f79732..8a7a59ce73a576 100644
--- a/README.md
+++ b/README.md
@@ -5,17 +5,39 @@
 </a>
 </div>
 
+Documentation:
+[![Documentation][docs-img]][docs-url]
+
+[docs-img]: https://img.shields.io/badge/docs-v1-blue.svg "Documentation (version 1)"
+[docs-url]: https://docs.julialang.org
+
+Continuous integration:
+[![Continuous integration (master)][buildkite-master-img]][buildkite-master-url]
+[![Continuous integration (master->scheduled)][buildkite-master-scheduled-img]][buildkite-master-scheduled-url]
+
+<!--
+To change the badge to point to a different pipeline, it is not sufficient to simply change the `?branch=` part.
+You need to go to the Buildkite website and get the SVG URL for the correct pipeline.
+-->
+[buildkite-master-img]: https://badge.buildkite.com/f28e0d28b345f9fad5856ce6a8d64fffc7c70df8f4f2685cd8.svg?branch=master "Continuous integration (master)"
+[buildkite-master-url]: https://buildkite.com/julialang/julia-master
+
+<!--
+To change the badge to point to a different pipeline, it is not sufficient to simply change the `?branch=` part.
+You need to go to the Buildkite website and get the SVG URL for the correct pipeline.
+-->
+[buildkite-master-scheduled-img]: https://badge.buildkite.com/d5ae34dbbf6fefe615300c4f3118bf63cb4a5ae7fd962263c1.svg?branch=master "Continuous integration (master->scheduled)"
+[buildkite-master-scheduled-url]: https://buildkite.com/julialang/julia-master-scheduled
+
 Code coverage:
-[![coveralls][coveralls-img]](https://coveralls.io/r/JuliaLang/julia?branch=master)
-[![codecov][codecov-img]](https://codecov.io/github/JuliaLang/julia?branch=master)
+[![Code coverage (Coveralls)][coveralls-img]][coveralls-url]
+[![Code coverage (Codecov)][codecov-img]][codecov-url]
 
-Documentation:
-[![version 1][docs-img]](https://docs.julialang.org)
+[coveralls-img]: https://img.shields.io/coveralls/github/JuliaLang/julia/master.svg?label=coveralls "Code coverage (Coveralls)"
+[coveralls-url]: https://coveralls.io/r/JuliaLang/julia?branch=master
 
-[appveyor-img]: https://img.shields.io/appveyor/ci/JuliaLang/julia/master.svg?label=Windows
-[coveralls-img]: https://img.shields.io/coveralls/github/JuliaLang/julia/master.svg?label=coveralls
-[codecov-img]: https://img.shields.io/codecov/c/github/JuliaLang/julia/master.svg?label=codecov
-[docs-img]: https://img.shields.io/badge/docs-v1-blue.svg
+[codecov-img]: https://img.shields.io/codecov/c/github/JuliaLang/julia/master.svg?label=codecov "Code coverage (Codecov)"
+[codecov-url]: https://codecov.io/github/JuliaLang/julia?branch=master
 
 ## The Julia Language
 
@@ -45,7 +67,6 @@ helpful to start contributing to the Julia codebase.
 
 - [**StackOverflow**](https://stackoverflow.com/questions/tagged/julia-lang)
 - [**Twitter**](https://twitter.com/JuliaLanguage)
-- [**Meetup**](https://julia.meetup.com/)
 - [**Learning resources**](https://julialang.org/learning/)
 
 ## Binary Installation
@@ -73,14 +94,14 @@ First, make sure you have all the [required
 dependencies](https://github.com/JuliaLang/julia/blob/master/doc/build/build.md#required-build-tools-and-external-libraries) installed.
 Then, acquire the source code by cloning the git repository:
 
-    git clone git://github.com/JuliaLang/julia.git
+    git clone https://github.com/JuliaLang/julia.git
 
 By default you will be building the latest unstable version of
 Julia. However, most users should use the most recent stable version
 of Julia. You can get this version by changing to the Julia directory
 and running:
 
-    git checkout v1.6.0
+    git checkout v1.6.2
 
 Now run `make` to build the `julia` executable.
 
@@ -145,10 +166,7 @@ Support for editing Julia is available for many
 [Sublime Text](https://github.com/JuliaEditorSupport/Julia-sublime), and many
 others.
 
-Supported IDEs include: [Juno](http://junolab.org/) (Atom plugin),
-[julia-vscode](https://github.com/JuliaEditorSupport/julia-vscode) (VS
-Code plugin), and
-[julia-intellij](https://github.com/JuliaEditorSupport/julia-intellij)
-(IntelliJ IDEA plugin). The popular [Jupyter](https://jupyter.org/)
-notebook interface is available through
-[IJulia](https://github.com/JuliaLang/IJulia.jl).
+Supported IDEs include: [julia-vscode](https://github.com/JuliaEditorSupport/julia-vscode) (VS
+Code plugin), [Juno](http://junolab.org/) (Atom plugin). [Jupyter](https://jupyter.org/)
+notebooks are available through the [IJulia](https://github.com/JuliaLang/IJulia.jl) package, and
+[Pluto](https://github.com/fonsp/Pluto.jl) notebooks through the Pluto.jl package.
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
new file mode 100644
index 00000000000000..32ef8eacd9ce48
--- /dev/null
+++ b/THIRDPARTY.md
@@ -0,0 +1,57 @@
+The Julia language is licensed under the MIT License (see [LICENSE.md](./LICENSE.md) ). The "language" consists
+of the compiler (the contents of src/), most of the standard library (base/),
+and some utilities (most of the rest of the files in this repository). See below
+for exceptions.
+
+- [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
+- [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
+- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC]
+- [MUSL](https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT) (for src/getopt.c and src/getopt.h) [MIT]
+- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
+- [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
+- [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF]
+- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
+
+The following components included in Julia `Base` have their own separate licenses:
+
+- base/ryu/* [Boost] (see [ryu](https://github.com/ulfjack/ryu/blob/master/LICENSE-Boost))
+- base/special/{rem_pio2,hyperbolic}.jl [Freely distributable with preserved copyright notice] (see [FDLIBM](https://www.netlib.org/fdlibm))
+
+The Julia language links to the following external libraries, which have their
+own licenses:
+
+- [LIBUNWIND](https://github.com/libunwind/libunwind/blob/master/LICENSE) [MIT]
+- [LIBUV](https://github.com/JuliaLang/libuv/blob/julia-uv2-1.39.0/LICENSE) [MIT]
+- [LLVM](https://releases.llvm.org/6.0.0/LICENSE.TXT) [UIUC]
+- [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
+
+Julia's `stdlib` uses the following external libraries, which have their own licenses:
+
+- [DSFMT](https://github.com/MersenneTwister-Lab/dSFMT/blob/master/LICENSE.txt) [BSD-3]
+- [OPENLIBM](https://github.com/JuliaMath/openlibm/blob/master/LICENSE.md) [MIT, BSD-2, ISC]
+- [GMP](https://gmplib.org/manual/Copying.html#Copying) [LGPL3+ or GPL2+]
+- [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
+- [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
+- [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
+- [MBEDTLS](https://github.com/ARMmbed/mbedtls/blob/development/LICENSE) [Apache 2.0]
+- [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
+- [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
+- [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
+- [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
+- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of LGPL2+ and GPL2+; see individual module licenses]
+- [LIBBLASTRAMPOLINE](https://github.com/staticfloat/libblastrampoline/blob/main/LICENSE) [MIT]
+- [NGHTTP2](https://github.com/nghttp2/nghttp2/blob/master/COPYING) [MIT]
+
+Julia's build process uses the following external tools:
+
+- [PATCHELF](https://nixos.org/patchelf.html)
+- [OBJCONV](https://www.agner.org/optimize/#objconv)
+- [LIBWHICH](https://github.com/vtjnash/libwhich/blob/master/LICENSE) [MIT]
+
+Julia bundles the following external programs and libraries:
+
+- [7-Zip](https://www.7-zip.org/license.txt)
+- [ZLIB](https://zlib.net/zlib_license.html)
+
+On some platforms, distributions of Julia contain SSL certificate authority certificates,
+released under the [Mozilla Public License](https://en.wikipedia.org/wiki/Mozilla_Public_License).
diff --git a/VERSION b/VERSION
index 17522d544ab967..31083204c40c0a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.7.0-DEV
+1.8.0-DEV
diff --git a/base/Base.jl b/base/Base.jl
index 3b531738276100..e4c65b3493cbaf 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -20,20 +20,44 @@ include(path::String) = include(Base, path)
 const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module)
 
+# The @inline/@noinline macros that can be applied to a function declaration are not available
+# until after array.jl, and so we will mark them within a function body instead.
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
+
 # Try to help prevent users from shooting them-selves in the foot
 # with ambiguities by defining a few common and critical operations
 # (and these don't need the extra convert code)
-getproperty(x::Module, f::Symbol) = getfield(x, f)
-setproperty!(x::Module, f::Symbol, v) = setfield!(x, f, v)
-getproperty(x::Type, f::Symbol) = getfield(x, f)
-setproperty!(x::Type, f::Symbol, v) = setfield!(x, f, v)
-getproperty(x::Tuple, f::Int) = getfield(x, f)
+getproperty(x::Module, f::Symbol) = (@inline; getfield(x, f))
+setproperty!(x::Module, f::Symbol, v) = setfield!(x, f, v) # to get a decent error
+getproperty(x::Type, f::Symbol) = (@inline; getfield(x, f))
+setproperty!(x::Type, f::Symbol, v) = error("setfield! fields of Types should not be changed")
+getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
 setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
 
-getproperty(x, f::Symbol) = getfield(x, f)
-dotgetproperty(x, f) = getproperty(x, f)
+getproperty(x, f::Symbol) = (@inline; getfield(x, f))
 setproperty!(x, f::Symbol, v) = setfield!(x, f, convert(fieldtype(typeof(x), f), v))
 
+dotgetproperty(x, f) = getproperty(x, f)
+
+getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Module, f::Symbol, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
+getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Type, f::Symbol, v, order::Symbol) = error("setfield! fields of Types should not be changed")
+getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
+
+getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x, f::Symbol, v, order::Symbol) = (@inline; setfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
+
+swapproperty!(x, f::Symbol, v, order::Symbol=:notatomic) =
+    (@inline; Core.swapfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
+modifyproperty!(x, f::Symbol, op, v, order::Symbol=:notatomic) =
+    (@inline; Core.modifyfield!(x, f, op, v, order))
+replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:notatomic, fail_order::Symbol=success_order) =
+    (@inline; Core.replacefield!(x, f, expected, convert(fieldtype(typeof(x), f), desired), success_order, fail_order))
+
+
 include("coreio.jl")
 
 eval(x) = Core.eval(Base, x)
@@ -79,6 +103,9 @@ include("options.jl")
 include("promotion.jl")
 include("tuple.jl")
 include("expr.jl")
+Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B} = (@inline; Pair{A, B}(convert(A, a)::A, convert(B, b)::B))
+#Pair{Any, B}(@nospecialize(a::Any), b) where {B} = (@inline; Pair{Any, B}(a, Base.convert(B, b)::B))
+#Pair{A, Any}(a, @nospecialize(b::Any)) where {A} = (@inline; Pair{A, Any}(Base.convert(A, a)::A, b))
 include("pair.jl")
 include("traits.jl")
 include("range.jl")
@@ -213,12 +240,11 @@ include("methodshow.jl")
 include("cartesian.jl")
 using .Cartesian
 include("multidimensional.jl")
-include("permuteddimsarray.jl")
-using .PermutedDimsArrays
 
 include("broadcast.jl")
 using .Broadcast
-using .Broadcast: broadcasted, broadcasted_kwsyntax, materialize, materialize!
+using .Broadcast: broadcasted, broadcasted_kwsyntax, materialize, materialize!,
+                  broadcast_preserving_zero_d, andand, oror
 
 # missing values
 include("missing.jl")
@@ -231,7 +257,9 @@ include("sysinfo.jl")
 include("libc.jl")
 using .Libc: getpid, gethostname, time
 
-include("env.jl")
+# Logging
+include("logging.jl")
+using .CoreLogging
 
 # Concurrency
 include("linked_list.jl")
@@ -243,9 +271,7 @@ include("task.jl")
 include("threads_overloads.jl")
 include("weakkeydict.jl")
 
-# Logging
-include("logging.jl")
-using .CoreLogging
+include("env.jl")
 
 # BinaryPlatforms, used by Artifacts
 include("binaryplatforms.jl")
@@ -293,6 +319,9 @@ end
 include("reducedim.jl")  # macros in this file relies on string.jl
 include("accumulate.jl")
 
+include("permuteddimsarray.jl")
+using .PermutedDimsArrays
+
 # basic data structures
 include("ordering.jl")
 using .Order
@@ -337,6 +366,9 @@ include("meta.jl")
 include("stacktraces.jl")
 using .StackTraces
 
+# experimental API's
+include("experimental.jl")
+
 # utilities
 include("deepcopy.jl")
 include("download.jl")
@@ -360,9 +392,6 @@ include("util.jl")
 
 include("asyncmap.jl")
 
-# experimental API's
-include("experimental.jl")
-
 # deprecated functions
 include("deprecated.jl")
 
@@ -424,6 +453,9 @@ for match = _methods(+, (Int, Int), -1, get_world_counter())
     Core.svec(1, 2) == Core.svec(3, 4)
     any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))])
 
+    # Code loading uses this
+    sortperm(mtime.(readdir(".")), rev=true)
+
     break   # only actually need to do this once
 end
 
diff --git a/base/Enums.jl b/base/Enums.jl
index 06860402fbcb1c..7b5e9587d5f6cb 100644
--- a/base/Enums.jl
+++ b/base/Enums.jl
@@ -25,10 +25,16 @@ Base.isless(x::T, y::T) where {T<:Enum} = isless(basetype(T)(x), basetype(T)(y))
 
 Base.Symbol(x::Enum) = namemap(typeof(x))[Integer(x)]::Symbol
 
-Base.print(io::IO, x::Enum) = print(io, Symbol(x))
+function _symbol(x::Enum)
+    names = namemap(typeof(x))
+    x = Integer(x)
+    get(() -> Symbol("<invalid #$x>"), names, x)::Symbol
+end
+
+Base.print(io::IO, x::Enum) = print(io, _symbol(x))
 
 function Base.show(io::IO, x::Enum)
-    sym = Symbol(x)
+    sym = _symbol(x)
     if !(get(io, :compact, false)::Bool)
         from = get(io, :module, Main)
         def = typeof(x).name.module
diff --git a/base/Makefile b/base/Makefile
index e024e546f94d30..56e1cbebf21bf1 100644
--- a/base/Makefile
+++ b/base/Makefile
@@ -6,12 +6,6 @@ include $(JULIAHOME)/Make.inc
 
 TAGGED_RELEASE_BANNER := ""
 
-ifneq ($(USEMSVC), 1)
-CPP_STDOUT := $(CPP) -P
-else
-CPP_STDOUT := $(CPP) -E
-endif
-
 all: $(addprefix $(BUILDDIR)/,pcre_h.jl errno_h.jl build_h.jl.phony features_h.jl file_constants.jl uv_constants.jl version_git.jl.phony)
 
 PCRE_CONST := 0x[0-9a-fA-F]+|[0-9]+|\([\-0-9]+\)
@@ -169,53 +163,69 @@ endif
 #	echo "$$P"
 
 define symlink_system_library
-symlink_$1: $$(build_private_libdir)/$1.$$(SHLIB_EXT)
-$$(build_private_libdir)/$1.$$(SHLIB_EXT):
-	REALPATH=`$$(call spawn,$$(build_depsbindir)/libwhich) -p $$(notdir $$@)` && \
-	$$(call resolve_path,REALPATH) && \
-	[ -e "$$$$REALPATH" ] && \
-	([ ! -e "$$@" ] || rm "$$@") && \
-	echo ln -sf "$$$$REALPATH" "$$@" && \
-	ln -sf "$$$$REALPATH" "$$@"
-ifneq ($2,)
-ifneq ($$(USE_SYSTEM_$2),0)
-SYMLINK_SYSTEM_LIBRARIES += symlink_$1
-endif
+libname_$2 := $$(notdir $(call versioned_libname,$2,$3))
+libpath_$2 := $$(shell $$(call spawn,$$(build_depsbindir)/libwhich) -p $$(libname_$2) 2>/dev/null)
+symlink_$2: $$(build_private_libdir)/$$(libname_$2)
+$$(build_private_libdir)/$$(libname_$2):
+	@if [ -e "$$(libpath_$2)" ]; then \
+		REALPATH=$$(libpath_$2); \
+		$$(call resolve_path,REALPATH) && \
+		[ -e "$$$$REALPATH" ] && \
+		([ ! -e "$$@" ] || rm "$$@") && \
+		echo ln -sf "$$$$REALPATH" "$$@" && \
+		ln -sf "$$$$REALPATH" "$$@"; \
+	else \
+		if [ "$4" != "ALLOW_FAILURE" ]; then \
+			echo "System library symlink failure: Unable to locate $$(libname_$2) on your system!" >&2; \
+			false; \
+		fi; \
+	fi
+ifneq ($$(USE_SYSTEM_$1),0)
+SYMLINK_SYSTEM_LIBRARIES += symlink_$2
 endif
 endef
 
 # the following excludes: libuv.a, libutf8proc.a
 
-$(eval $(call symlink_system_library,$(LIBMNAME)))
 ifneq ($(USE_SYSTEM_LIBM),0)
-SYMLINK_SYSTEM_LIBRARIES += symlink_$(LIBMNAME)
+$(eval $(call symlink_system_library,LIBM,$(LIBMNAME)))
 else ifneq ($(USE_SYSTEM_OPENLIBM),0)
-SYMLINK_SYSTEM_LIBRARIES += symlink_$(LIBMNAME)
+$(eval $(call symlink_system_library,OPENLIBM,$(LIBMNAME)))
 endif
 
-$(eval $(call symlink_system_library,libpcre2-8,PCRE))
-$(eval $(call symlink_system_library,libdSFMT,DSFMT))
-$(eval $(call symlink_system_library,$(LIBBLASNAME),BLAS))
+$(eval $(call symlink_system_library,CSL,libgcc_s,1))
+ifneq (,$(LIBGFORTRAN_VERSION))
+$(eval $(call symlink_system_library,CSL,libgfortran,$(LIBGFORTRAN_VERSION)))
+endif
+$(eval $(call symlink_system_library,CSL,libquadmath,0))
+$(eval $(call symlink_system_library,CSL,libstdc++,6))
+# We allow libssp, libatomic and libgomp to fail as they are not available on all systems
+$(eval $(call symlink_system_library,CSL,libssp,0,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,CSL,libatomic,1,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,CSL,libgomp,1,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,PCRE,libpcre2-8))
+$(eval $(call symlink_system_library,DSFMT,libdSFMT))
+$(eval $(call symlink_system_library,BLAS,$(LIBBLASNAME)))
 ifneq ($(LIBLAPACKNAME),$(LIBBLASNAME))
-$(eval $(call symlink_system_library,$(LIBLAPACKNAME),LAPACK))
+$(eval $(call symlink_system_library,LAPACK,$(LIBLAPACKNAME)))
 endif
-$(eval $(call symlink_system_library,libgmp,GMP))
-$(eval $(call symlink_system_library,libmpfr,MPFR))
-$(eval $(call symlink_system_library,libmbedtls,MBEDTLS))
-$(eval $(call symlink_system_library,libmbedcrypto,MBEDTLS))
-$(eval $(call symlink_system_library,libmbedx509,MBEDTLS))
-$(eval $(call symlink_system_library,libssh2,LIBSSH2))
-$(eval $(call symlink_system_library,libnghttp2,NGHTTP2))
-$(eval $(call symlink_system_library,libcurl,CURL))
-$(eval $(call symlink_system_library,libgit2,LIBGIT2))
-$(eval $(call symlink_system_library,libamd,SUITESPARSE))
-$(eval $(call symlink_system_library,libcamd,SUITESPARSE))
-$(eval $(call symlink_system_library,libccolamd,SUITESPARSE))
-$(eval $(call symlink_system_library,libcholmod,SUITESPARSE))
-$(eval $(call symlink_system_library,libcolamd,SUITESPARSE))
-$(eval $(call symlink_system_library,libumfpack,SUITESPARSE))
-$(eval $(call symlink_system_library,libspqr,SUITESPARSE))
-$(eval $(call symlink_system_library,libsuitesparseconfig,SUITESPARSE))
+$(eval $(call symlink_system_library,GMP,libgmp))
+$(eval $(call symlink_system_library,MPFR,libmpfr))
+$(eval $(call symlink_system_library,MBEDTLS,libmbedtls))
+$(eval $(call symlink_system_library,MBEDTLS,libmbedcrypto))
+$(eval $(call symlink_system_library,MBEDTLS,libmbedx509))
+$(eval $(call symlink_system_library,LIBSSH2,libssh2))
+$(eval $(call symlink_system_library,NGHTTP2,libnghttp2))
+$(eval $(call symlink_system_library,CURL,libcurl))
+$(eval $(call symlink_system_library,LIBGIT2,libgit2))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libamd))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libcamd))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libccolamd))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libcholmod))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libcolamd))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libumfpack))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libspqr))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libsuitesparseconfig))
 # EXCLUDED LIBRARIES (installed/used, but not vendored for use with dlopen):
 # libunwind
 endif # WINNT
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 4bc3c4f70ebb4f..5ded231fa2be27 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -8,6 +8,8 @@
 Supertype for `N`-dimensional arrays (or array-like types) with elements of type `T`.
 [`Array`](@ref) and other types are subtypes of this. See the manual section on the
 [`AbstractArray` interface](@ref man-interface-array).
+
+See also: [`AbstractVector`](@ref), [`AbstractMatrix`](@ref), [`eltype`](@ref), [`ndims`](@ref).
 """
 AbstractArray
 
@@ -24,6 +26,8 @@ dimension to just get the length of that dimension.
 Note that `size` may not be defined for arrays with non-standard indices, in which case [`axes`](@ref)
 may be useful. See the manual chapter on [arrays with custom indices](@ref man-custom-indices).
 
+See also: [`length`](@ref), [`ndims`](@ref), [`eachindex`](@ref), [`sizeof`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = fill(1, (2,3,4));
@@ -66,7 +70,7 @@ ix[(begin+1):end]  # works for generalized indexes
 ```
 """
 function axes(A::AbstractArray{T,N}, d) where {T,N}
-    @_inline_meta
+    @inline
     d::Integer <= N ? axes(A)[d] : OneTo(1)
 end
 
@@ -75,6 +79,8 @@ end
 
 Return the tuple of valid indices for array `A`.
 
+See also: [`size`](@ref), [`keys`](@ref), [`eachindex`](@ref).
+
 # Examples
 
 ```jldoctest
@@ -85,7 +91,7 @@ julia> axes(A)
 ```
 """
 function axes(A)
-    @_inline_meta
+    @inline
     map(oneto, size(A))
 end
 
@@ -96,7 +102,8 @@ end
 Return `true` if the indices of `A` start with something other than 1 along any axis.
 If multiple arguments are passed, equivalent to `has_offset_axes(A) | has_offset_axes(B) | ...`.
 """
-has_offset_axes(A)    = _tuple_any(x->Int(first(x))::Int != 1, axes(A))
+has_offset_axes(A) = _tuple_any(x->Int(first(x))::Int != 1, axes(A))
+has_offset_axes(A::AbstractVector) = Int(firstindex(A))::Int != 1 # improve performance of a common case (ranges)
 has_offset_axes(A...) = _tuple_any(has_offset_axes, A)
 has_offset_axes(::Colon) = false
 
@@ -106,12 +113,22 @@ require_one_based_indexing(A...) = !has_offset_axes(A...) || throw(ArgumentError
 # for d=1. 1d arrays are heavily used, and the first dimension comes up
 # in other applications.
 axes1(A::AbstractArray{<:Any,0}) = OneTo(1)
-axes1(A::AbstractArray) = (@_inline_meta; axes(A)[1])
+axes1(A::AbstractArray) = (@inline; axes(A)[1])
 axes1(iter) = oneto(length(iter))
 
-unsafe_indices(A) = axes(A)
-unsafe_indices(r::AbstractRange) = (oneto(unsafe_length(r)),) # Ranges use checked_sub for size
+"""
+    keys(a::AbstractArray)
+
+Return an efficient array describing all valid indices for `a` arranged in the shape of `a` itself.
+
+They keys of 1-dimensional arrays (vectors) are integers, whereas all other N-dimensional
+arrays use [`CartesianIndex`](@ref) to describe their locations.  Often the special array
+types [`LinearIndices`](@ref) and [`CartesianIndices`](@ref) are used to efficiently
+represent these arrays of integers and `CartesianIndex`es, respectively.
 
+Note that the `keys` of an array might not be the most efficient index type; for maximum
+performance use  [`eachindex`](@ref) instead.
+"""
 keys(a::AbstractArray) = CartesianIndices(axes(a))
 keys(a::AbstractVector) = LinearIndices(a)
 
@@ -173,6 +190,8 @@ For dictionary types, this will be a `Pair{KeyType,ValType}`. The definition
 instead of types. However the form that accepts a type argument should be defined for new
 types.
 
+See also: [`keytype`](@ref), [`typeof`](@ref).
+
 # Examples
 ```jldoctest
 julia> eltype(fill(1f0, (2,2)))
@@ -201,6 +220,8 @@ elsize(A::AbstractArray) = elsize(typeof(A))
 
 Return the number of dimensions of `A`.
 
+See also: [`size`](@ref), [`axes`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = fill(1, (3,4,5));
@@ -210,7 +231,7 @@ julia> ndims(A)
 ```
 """
 ndims(::AbstractArray{T,N}) where {T,N} = N
-ndims(::Type{<:AbstractArray{T,N}}) where {T,N} = N
+ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N
 
 """
     length(collection) -> Integer
@@ -219,6 +240,8 @@ Return the number of elements in the collection.
 
 Use [`lastindex`](@ref) to get the last valid index of an indexable collection.
 
+See also: [`size`](@ref), [`ndims`](@ref), [`eachindex`](@ref).
+
 # Examples
 ```jldoctest
 julia> length(1:5)
@@ -247,13 +270,13 @@ julia> length([1 2; 3 4])
 4
 ```
 """
-length(t::AbstractArray) = (@_inline_meta; prod(size(t)))
+length(t::AbstractArray) = (@inline; prod(size(t)))
 
 # `eachindex` is mostly an optimization of `keys`
 eachindex(itrs...) = keys(itrs...)
 
 # eachindex iterates over all indices. IndexCartesian definitions are later.
-eachindex(A::AbstractVector) = (@_inline_meta(); axes1(A))
+eachindex(A::AbstractVector) = (@inline(); axes1(A))
 
 
 @noinline function throw_eachindex_mismatch_indices(::IndexLinear, inds...)
@@ -277,7 +300,7 @@ If you supply more than one `AbstractArray` argument, `eachindex` will create an
 iterable object that is fast for all arguments (a [`UnitRange`](@ref)
 if all inputs have fast linear indexing, a [`CartesianIndices`](@ref)
 otherwise).
-If the arrays have different sizes and/or dimensionalities, a DimensionMismatch exception
+If the arrays have different sizes and/or dimensionalities, a `DimensionMismatch` exception
 will be thrown.
 # Examples
 ```jldoctest
@@ -298,27 +321,27 @@ CartesianIndex(1, 1)
 CartesianIndex(2, 1)
 ```
 """
-eachindex(A::AbstractArray) = (@_inline_meta(); eachindex(IndexStyle(A), A))
+eachindex(A::AbstractArray) = (@inline(); eachindex(IndexStyle(A), A))
 
 function eachindex(A::AbstractArray, B::AbstractArray)
-    @_inline_meta
+    @inline
     eachindex(IndexStyle(A,B), A, B)
 end
 function eachindex(A::AbstractArray, B::AbstractArray...)
-    @_inline_meta
+    @inline
     eachindex(IndexStyle(A,B...), A, B...)
 end
-eachindex(::IndexLinear, A::AbstractArray) = (@_inline_meta; oneto(length(A)))
-eachindex(::IndexLinear, A::AbstractVector) = (@_inline_meta; axes1(A))
+eachindex(::IndexLinear, A::AbstractArray) = (@inline; oneto(length(A)))
+eachindex(::IndexLinear, A::AbstractVector) = (@inline; axes1(A))
 function eachindex(::IndexLinear, A::AbstractArray, B::AbstractArray...)
-    @_inline_meta
+    @inline
     indsA = eachindex(IndexLinear(), A)
     _all_match_first(X->eachindex(IndexLinear(), X), indsA, B...) ||
         throw_eachindex_mismatch_indices(IndexLinear(), eachindex(A), eachindex.(B)...)
     indsA
 end
 function _all_match_first(f::F, inds, A, B...) where F<:Function
-    @_inline_meta
+    @inline
     (inds == f(A)) & _all_match_first(f, inds, B...)
 end
 _all_match_first(f::F, inds) where F<:Function = true
@@ -335,6 +358,8 @@ Return the last index of `collection`. If `d` is given, return the last index of
 The syntaxes `A[end]` and `A[end, end]` lower to `A[lastindex(A)]` and
 `A[lastindex(A, 1), lastindex(A, 2)]`, respectively.
 
+See also: [`axes`](@ref), [`firstindex`](@ref), [`eachindex`](@ref), [`prevind`](@ref).
+
 # Examples
 ```jldoctest
 julia> lastindex([1,2,4])
@@ -344,8 +369,8 @@ julia> lastindex(rand(3,4,5), 2)
 4
 ```
 """
-lastindex(a::AbstractArray) = (@_inline_meta; last(eachindex(IndexLinear(), a)))
-lastindex(a, d) = (@_inline_meta; last(axes(a, d)))
+lastindex(a::AbstractArray) = (@inline; last(eachindex(IndexLinear(), a)))
+lastindex(a, d) = (@inline; last(axes(a, d)))
 
 """
     firstindex(collection) -> Integer
@@ -353,6 +378,11 @@ lastindex(a, d) = (@_inline_meta; last(axes(a, d)))
 
 Return the first index of `collection`. If `d` is given, return the first index of `collection` along dimension `d`.
 
+The syntaxes `A[begin]` and `A[1, begin]` lower to `A[firstindex(A)]` and
+`A[1, firstindex(A, 2)]`, respectively.
+
+See also: [`first`](@ref), [`axes`](@ref), [`lastindex`](@ref), [`nextind`](@ref).
+
 # Examples
 ```jldoctest
 julia> firstindex([1,2,4])
@@ -362,8 +392,8 @@ julia> firstindex(rand(3,4,5), 2)
 1
 ```
 """
-firstindex(a::AbstractArray) = (@_inline_meta; first(eachindex(IndexLinear(), a)))
-firstindex(a, d) = (@_inline_meta; first(axes(a, d)))
+firstindex(a::AbstractArray) = (@inline; first(eachindex(IndexLinear(), a)))
+firstindex(a, d) = (@inline; first(axes(a, d)))
 
 first(a::AbstractArray) = a[first(eachindex(a))]
 
@@ -373,6 +403,8 @@ first(a::AbstractArray) = a[first(eachindex(a))]
 Get the first element of an iterable collection. Return the start point of an
 [`AbstractRange`](@ref) even if it is empty.
 
+See also: [`only`](@ref), [`firstindex`](@ref), [`last`](@ref).
+
 # Examples
 ```jldoctest
 julia> first(2:2:10)
@@ -391,9 +423,11 @@ end
 """
     first(itr, n::Integer)
 
-Get the first `n` elements of the iterable collection `itr`, or fewer elements if `v` is not
+Get the first `n` elements of the iterable collection `itr`, or fewer elements if `itr` is not
 long enough.
 
+See also: [`startswith`](@ref), [`Iterators.take`](@ref).
+
 !!! compat "Julia 1.6"
     This method requires at least Julia 1.6.
 
@@ -425,6 +459,8 @@ Get the last element of an ordered collection, if it can be computed in O(1) tim
 accomplished by calling [`lastindex`](@ref) to get the last index. Return the end
 point of an [`AbstractRange`](@ref) even if it is empty.
 
+See also [`first`](@ref), [`endswith`](@ref).
+
 # Examples
 ```jldoctest
 julia> last(1:2:10)
@@ -439,7 +475,7 @@ last(a) = a[end]
 """
     last(itr, n::Integer)
 
-Get the last `n` elements of the iterable collection `itr`, or fewer elements if `v` is not
+Get the last `n` elements of the iterable collection `itr`, or fewer elements if `itr` is not
 long enough.
 
 !!! compat "Julia 1.6"
@@ -471,6 +507,8 @@ end
 
 Return a tuple of the memory strides in each dimension.
 
+See also: [`stride`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = fill(1, (3,4,5));
@@ -486,6 +524,8 @@ function strides end
 
 Return the distance in memory (in number of elements) between adjacent elements in dimension `k`.
 
+See also: [`strides`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = fill(1, (3,4,5));
@@ -537,14 +577,14 @@ end
 function trailingsize(inds::Indices, n)
     s = 1
     for i=n:length(inds)
-        s *= unsafe_length(inds[i])
+        s *= length(inds[i])
     end
     return s
 end
 # This version is type-stable even if inds is heterogeneous
 function trailingsize(inds::Indices)
-    @_inline_meta
-    prod(map(unsafe_length, inds))
+    @inline
+    prod(map(length, inds))
 end
 
 ## Bounds checking ##
@@ -591,18 +631,18 @@ false
 ```
 """
 function checkbounds(::Type{Bool}, A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     checkbounds_indices(Bool, axes(A), I)
 end
 
 # Linear indexing is explicitly allowed when there is only one (non-cartesian) index
 function checkbounds(::Type{Bool}, A::AbstractArray, i)
-    @_inline_meta
+    @inline
     checkindex(Bool, eachindex(IndexLinear(), A), i)
 end
 # As a special extension, allow using logical arrays that match the source array exactly
 function checkbounds(::Type{Bool}, A::AbstractArray{<:Any,N}, I::AbstractArray{Bool,N}) where N
-    @_inline_meta
+    @inline
     axes(A) == axes(I)
 end
 
@@ -612,7 +652,7 @@ end
 Throw an error if the specified indices `I` are not in bounds for the given array `A`.
 """
 function checkbounds(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     checkbounds(Bool, A, I...) || throw_boundserror(A, I)
     nothing
 end
@@ -638,17 +678,17 @@ of `IA`.
 See also [`checkbounds`](@ref).
 """
 function checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple)
-    @_inline_meta
+    @inline
     checkindex(Bool, IA[1], I[1])::Bool & checkbounds_indices(Bool, tail(IA), tail(I))
 end
 function checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple)
-    @_inline_meta
+    @inline
     checkindex(Bool, OneTo(1), I[1])::Bool & checkbounds_indices(Bool, (), tail(I))
 end
-checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@_inline_meta; all(x->unsafe_length(x)==1, IA))
+checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@inline; all(x->length(x)==1, IA))
 checkbounds_indices(::Type{Bool}, ::Tuple{}, ::Tuple{}) = true
 
-throw_boundserror(A, I) = (@_noinline_meta; throw(BoundsError(A, I)))
+throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
 
 # check along a single dimension
 """
@@ -659,6 +699,8 @@ Return `true` if the given `index` is within the bounds of
 arrays can extend this method in order to provide a specialized bounds
 checking implementation.
 
+See also [`checkbounds`](@ref).
+
 # Examples
 ```jldoctest
 julia> checkindex(Bool, 1:20, 8)
@@ -680,7 +722,7 @@ end
 checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractVector{Bool}) = indx == axes1(I)
 checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractArray{Bool}) = false
 function checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractArray)
-    @_inline_meta
+    @inline
     b = true
     for i in I
         b &= checkindex(Bool, inds, i)
@@ -734,6 +776,7 @@ julia> similar(falses(10), Float64, 2, 4)
  2.18425e-314  2.18425e-314  2.18425e-314  2.18425e-314
 ```
 
+See also: [`undef`](@ref), [`isassigned`](@ref).
 """
 similar(a::AbstractArray{T}) where {T}                             = similar(a, T)
 similar(a::AbstractArray, ::Type{T}) where {T}                     = similar(a, T, to_shape(axes(a)))
@@ -746,6 +789,7 @@ similar(a::AbstractArray, ::Type{T}, dims::DimOrInd...) where {T}  = similar(a,
 # define this method to convert supported axes to Ints, with the expectation that an offset array
 # package will define a method with dims::Tuple{Union{Integer, UnitRange}, Vararg{Union{Integer, UnitRange}}}
 similar(a::AbstractArray, ::Type{T}, dims::Tuple{Union{Integer, OneTo}, Vararg{Union{Integer, OneTo}}}) where {T} = similar(a, T, to_shape(dims))
+similar(a::AbstractArray, ::Type{T}, dims::Tuple{Integer, Vararg{Integer}}) where {T} = similar(a, T, to_shape(dims))
 # similar creates an Array by default
 similar(a::AbstractArray, ::Type{T}, dims::Dims{N}) where {T,N}    = Array{T,N}(undef, dims)
 
@@ -789,6 +833,8 @@ similar(::Type{T}, dims::Dims) where {T<:AbstractArray} = T(undef, dims)
 
 Create an empty vector similar to `v`, optionally changing the `eltype`.
 
+See also: [`empty!`](@ref), [`isempty`](@ref), [`isassigned`](@ref).
+
 # Examples
 
 ```jldoctest
@@ -813,6 +859,7 @@ elements in `dst`.
 If `dst` and `src` are of the same type, `dst == src` should hold after
 the call. If `dst` and `src` are multidimensional arrays, they must have
 equal [`axes`](@ref).
+
 See also [`copyto!`](@ref).
 
 !!! compat "Julia 1.1"
@@ -920,11 +967,12 @@ end
 """
     copyto!(dest::AbstractArray, src) -> dest
 
-
 Copy all elements from collection `src` to array `dest`, whose length must be greater than
 or equal to the length `n` of `src`. The first `n` elements of `dest` are overwritten,
 the other elements are left untouched.
 
+See also [`copy!`](@ref Base.copy!), [`copy`](@ref).
+
 # Examples
 ```jldoctest
 julia> x = [1., 0., 3., 0., 5.];
@@ -1082,7 +1130,7 @@ function copymutable(a::AbstractArray)
 end
 copymutable(itr) = collect(itr)
 
-zero(x::AbstractArray{T}) where {T} = fill!(similar(x), zero(T))
+zero(x::AbstractArray{T}) where {T} = fill!(similar(x, typeof(zero(T))), zero(T))
 
 ## iteration support for arrays by iterating over `eachindex` in the array ##
 # Allows fast iteration by default for both IndexLinear and IndexCartesian arrays
@@ -1115,7 +1163,7 @@ end
 # convenience in cases that work.
 pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, x)
 function pointer(x::AbstractArray{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     unsafe_convert(Ptr{T}, x) + Int(_memory_offset(x, i))::Int
 end
 
@@ -1170,10 +1218,10 @@ function getindex(A::AbstractArray, I...)
     _getindex(IndexStyle(A), A, to_indices(A, I)...)
 end
 # To avoid invalidations from multidimensional.jl: getindex(A::Array, i1::Union{Integer, CartesianIndex}, I::Union{Integer, CartesianIndex}...)
-getindex(A::Array, i1::Integer, I::Integer...) = A[to_indices(A, (i1, I...))...]
+@propagate_inbounds getindex(A::Array, i1::Integer, I::Integer...) = A[to_indices(A, (i1, I...))...]
 
 function unsafe_getindex(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     @inbounds r = getindex(A, I...)
     r
 end
@@ -1192,7 +1240,7 @@ _getindex(::IndexStyle, A::AbstractArray, I...) =
 _getindex(::IndexLinear, A::AbstractVector, i::Int) = (@_propagate_inbounds_meta; getindex(A, i))  # ambiguity resolution in case packages specialize this (to be avoided if at all possible, but see Interpolations.jl)
 _getindex(::IndexLinear, A::AbstractArray, i::Int) = (@_propagate_inbounds_meta; getindex(A, i))
 function _getindex(::IndexLinear, A::AbstractArray, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...) # generally _to_linear_index requires bounds checking
     @inbounds r = getindex(A, _to_linear_index(A, I...))
     r
@@ -1200,11 +1248,11 @@ end
 _to_linear_index(A::AbstractArray, i::Integer) = i
 _to_linear_index(A::AbstractVector, i::Integer, I::Integer...) = i
 _to_linear_index(A::AbstractArray) = first(LinearIndices(A))
-_to_linear_index(A::AbstractArray, I::Integer...) = (@_inline_meta; _sub2ind(A, I...))
+_to_linear_index(A::AbstractArray, I::Integer...) = (@inline; _sub2ind(A, I...))
 
 ## IndexCartesian Scalar indexing: Canonical method is full dimensionality of Ints
 function _getindex(::IndexCartesian, A::AbstractArray, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...) # generally _to_subscript_indices requires bounds checking
     @inbounds r = getindex(A, _to_subscript_indices(A, I...)...)
     r
@@ -1213,13 +1261,13 @@ function _getindex(::IndexCartesian, A::AbstractArray{T,N}, I::Vararg{Int, N}) w
     @_propagate_inbounds_meta
     getindex(A, I...)
 end
-_to_subscript_indices(A::AbstractArray, i::Integer) = (@_inline_meta; _unsafe_ind2sub(A, i))
-_to_subscript_indices(A::AbstractArray{T,N}) where {T,N} = (@_inline_meta; fill_to_length((), 1, Val(N)))
+_to_subscript_indices(A::AbstractArray, i::Integer) = (@inline; _unsafe_ind2sub(A, i))
+_to_subscript_indices(A::AbstractArray{T,N}) where {T,N} = (@inline; fill_to_length((), 1, Val(N)))
 _to_subscript_indices(A::AbstractArray{T,0}) where {T} = ()
 _to_subscript_indices(A::AbstractArray{T,0}, i::Integer) where {T} = ()
 _to_subscript_indices(A::AbstractArray{T,0}, I::Integer...) where {T} = ()
 function _to_subscript_indices(A::AbstractArray{T,N}, I::Integer...) where {T,N}
-    @_inline_meta
+    @inline
     J, Jrem = IteratorsMD.split(I, Val(N))
     _to_subscript_indices(A, J, Jrem)
 end
@@ -1227,15 +1275,15 @@ _to_subscript_indices(A::AbstractArray, J::Tuple, Jrem::Tuple{}) =
     __to_subscript_indices(A, axes(A), J, Jrem)
 function __to_subscript_indices(A::AbstractArray,
         ::Tuple{AbstractUnitRange,Vararg{AbstractUnitRange}}, J::Tuple, Jrem::Tuple{})
-    @_inline_meta
+    @inline
     (J..., map(first, tail(_remaining_size(J, axes(A))))...)
 end
 _to_subscript_indices(A, J::Tuple, Jrem::Tuple) = J # already bounds-checked, safe to drop
 _to_subscript_indices(A::AbstractArray{T,N}, I::Vararg{Int,N}) where {T,N} = I
 _remaining_size(::Tuple{Any}, t::Tuple) = t
-_remaining_size(h::Tuple, t::Tuple) = (@_inline_meta; _remaining_size(tail(h), tail(t)))
+_remaining_size(h::Tuple, t::Tuple) = (@inline; _remaining_size(tail(h), tail(t)))
 _unsafe_ind2sub(::Tuple{}, i) = () # _ind2sub may throw(BoundsError()) in this case
-_unsafe_ind2sub(sz, i) = (@_inline_meta; _ind2sub(sz, i))
+_unsafe_ind2sub(sz, i) = (@inline; _ind2sub(sz, i))
 
 ## Setindex! is defined similarly. We first dispatch to an internal _setindex!
 # function that allows dispatch on array storage
@@ -1267,7 +1315,7 @@ function setindex!(A::AbstractArray, v, I...)
     _setindex!(IndexStyle(A), A, v, to_indices(A, I)...)
 end
 function unsafe_setindex!(A::AbstractArray, v, I...)
-    @_inline_meta
+    @inline
     @inbounds r = setindex!(A, v, I...)
     r
 end
@@ -1285,7 +1333,7 @@ _setindex!(::IndexStyle, A::AbstractArray, v, I...) =
 ## IndexLinear Scalar indexing
 _setindex!(::IndexLinear, A::AbstractArray, v, i::Int) = (@_propagate_inbounds_meta; setindex!(A, v, i))
 function _setindex!(::IndexLinear, A::AbstractArray, v, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     @inbounds r = setindex!(A, v, _to_linear_index(A, I...))
     r
@@ -1297,7 +1345,7 @@ function _setindex!(::IndexCartesian, A::AbstractArray{T,N}, v, I::Vararg{Int, N
     setindex!(A, v, I...)
 end
 function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     @inbounds r = setindex!(A, v, _to_subscript_indices(A, I...)...)
     r
@@ -1364,7 +1412,7 @@ much more common case where aliasing does not occur. By default,
 `Base.unaliascopy(A)`.
 """
 unaliascopy(A::Array) = copy(A)
-unaliascopy(A::AbstractArray)::typeof(A) = (@_noinline_meta; _unaliascopy(A, copy(A)))
+unaliascopy(A::AbstractArray)::typeof(A) = (@noinline; _unaliascopy(A, copy(A)))
 _unaliascopy(A::T, C::T) where {T} = C
 _unaliascopy(A, C) = throw(ArgumentError("""
     an array of type `$(typename(typeof(A)).wrapper)` shares memory with another argument
@@ -1418,6 +1466,9 @@ RangeVecIntList{A<:AbstractVector{Int}} = Union{Tuple{Vararg{Union{AbstractRange
 get(A::AbstractArray, i::Integer, default) = checkbounds(Bool, A, i) ? A[i] : default
 get(A::AbstractArray, I::Tuple{}, default) = checkbounds(Bool, A) ? A[] : default
 get(A::AbstractArray, I::Dims, default) = checkbounds(Bool, A, I...) ? A[I...] : default
+get(f::Callable, A::AbstractArray, i::Integer) = checkbounds(Bool, A, i) ? A[i] : f()
+get(f::Callable, A::AbstractArray, I::Tuple{}) = checkbounds(Bool, A) ? A[] : f()
+get(f::Callable, A::AbstractArray, I::Dims) = checkbounds(Bool, A, I...) ? A[I...] : f()
 
 function get!(X::AbstractVector{T}, A::AbstractVector, I::Union{AbstractRange,AbstractVector{Int}}, default::T) where T
     # 1d is not linear indexing
@@ -1475,10 +1526,10 @@ vcat(X::T...) where {T<:Number} = T[ X[i] for i=1:length(X) ]
 hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=1:length(X) ]
 hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=1:length(X) ]
 
-vcat(X::Number...) = hvcat_fill(Vector{promote_typeof(X...)}(undef, length(X)), X)
-hcat(X::Number...) = hvcat_fill(Matrix{promote_typeof(X...)}(undef, 1,length(X)), X)
-typed_vcat(::Type{T}, X::Number...) where {T} = hvcat_fill(Vector{T}(undef, length(X)), X)
-typed_hcat(::Type{T}, X::Number...) where {T} = hvcat_fill(Matrix{T}(undef, 1,length(X)), X)
+vcat(X::Number...) = hvcat_fill!(Vector{promote_typeof(X...)}(undef, length(X)), X)
+hcat(X::Number...) = hvcat_fill!(Matrix{promote_typeof(X...)}(undef, 1,length(X)), X)
+typed_vcat(::Type{T}, X::Number...) where {T} = hvcat_fill!(Vector{T}(undef, length(X)), X)
+typed_hcat(::Type{T}, X::Number...) where {T} = hvcat_fill!(Matrix{T}(undef, 1,length(X)), X)
 
 vcat(V::AbstractVector...) = typed_vcat(promote_eltype(V...), V...)
 vcat(V::AbstractVector{T}...) where {T} = typed_vcat(T, V...)
@@ -1490,7 +1541,7 @@ AbstractVecOrTuple{T} = Union{AbstractVector{<:T}, Tuple{Vararg{T}}}
 
 _typed_vcat_similar(V, ::Type{T}, n) where T = similar(V[1], T, n)
 _typed_vcat(::Type{T}, V::AbstractVecOrTuple{AbstractVector}) where T =
-    _typed_vcat!(_typed_vcat_similar(V, T, mapreduce(length, +, V)), V)
+    _typed_vcat!(_typed_vcat_similar(V, T, sum(map(length, V))), V)
 
 function _typed_vcat!(a::AbstractVector{T}, V::AbstractVecOrTuple{AbstractVector}) where T
     pos = 1
@@ -1581,11 +1632,21 @@ cat_size(A::AbstractArray) = size(A)
 cat_size(A, d) = 1
 cat_size(A::AbstractArray, d) = size(A, d)
 
+cat_length(::Any) = 1
+cat_length(a::AbstractArray) = length(a)
+
+cat_ndims(a) = 0
+cat_ndims(a::AbstractArray) = ndims(a)
+
 cat_indices(A, d) = OneTo(1)
 cat_indices(A::AbstractArray, d) = axes(A, d)
 
-cat_similar(A, ::Type{T}, shape) where T = Array{T}(undef, shape)
-cat_similar(A::AbstractArray, ::Type{T}, shape) where T = similar(A, T, shape)
+cat_similar(A, ::Type{T}, shape::Tuple) where T = Array{T}(undef, shape)
+cat_similar(A, ::Type{T}, shape::Vector) where T = Array{T}(undef, shape...)
+cat_similar(A::Array, ::Type{T}, shape::Tuple) where T = Array{T}(undef, shape)
+cat_similar(A::Array, ::Type{T}, shape::Vector) where T = Array{T}(undef, shape...)
+cat_similar(A::AbstractArray, T::Type, shape::Tuple) = similar(A, T, shape)
+cat_similar(A::AbstractArray, T::Type, shape::Vector) = similar(A, T, shape...)
 
 # These are for backwards compatibility (even though internal)
 cat_shape(dims, shape::Tuple{Vararg{Int}}) = shape
@@ -1604,7 +1665,7 @@ _cat_size_shape(dims, shape) = shape
 
 _cshp(ndim::Int, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
 _cshp(ndim::Int, ::Tuple{}, ::Tuple{}, nshape) = nshape
-_cshp(ndim::Int, dims, ::Tuple{}, ::Tuple{}) = ntuple(b -> 1, Val(length(dims)))
+_cshp(ndim::Int, dims, ::Tuple{}, ::Tuple{}) = ntuple(Returns(1), Val(length(dims)))
 @inline _cshp(ndim::Int, dims, shape, ::Tuple{}) =
     (shape[1] + dims[1], _cshp(ndim + 1, tail(dims), tail(shape), ())...)
 @inline _cshp(ndim::Int, dims, ::Tuple{}, nshape) =
@@ -1778,6 +1839,24 @@ dimensions for every new input array and putting zero blocks elsewhere. For exam
 `cat(matrices...; dims=(1,2))` builds a block diagonal matrix, i.e. a block matrix with
 `matrices[1]`, `matrices[2]`, ... as diagonal blocks and matching zero blocks away from the
 diagonal.
+
+See also [`hcat`](@ref), [`vcat`](@ref), [`hvcat`](@ref), [`repeat`](@ref).
+
+# Examples
+```jldoctest
+julia> cat([1 2; 3 4], [pi, pi], fill(10, 2,3,1); dims=2)
+2×6×1 Array{Float64, 3}:
+[:, :, 1] =
+ 1.0  2.0  3.14159  10.0  10.0  10.0
+ 3.0  4.0  3.14159  10.0  10.0  10.0
+
+julia> cat(true, trues(2,2), trues(4)', dims=(1,2))
+4×7 Matrix{Bool}:
+ 1  0  0  0  0  0  0
+ 0  1  1  0  0  0  0
+ 0  1  1  0  0  0  0
+ 0  0  0  1  1  1  1
+```
 """
 @inline cat(A...; dims) = _cat(dims, A...)
 _cat(catdims, A::AbstractArray{T}...) where {T} = cat_t(T, A...; dims=catdims)
@@ -1810,7 +1889,7 @@ function hvcat(nbc::Integer, as...)
     mod(n,nbc) != 0 &&
         throw(ArgumentError("number of arrays $n is not a multiple of the requested number of block columns $nbc"))
     nbr = div(n,nbc)
-    hvcat(ntuple(i->nbc, nbr), as...)
+    hvcat(ntuple(Returns(nbc), nbr), as...)
 end
 
 """
@@ -1835,7 +1914,7 @@ julia> hvcat((3,3), a,b,c,d,e,f)
  1  2  3
  4  5  6
 
-julia> [a b;c d; e f]
+julia> [a b; c d; e f]
 3×2 Matrix{Int64}:
  1  2
  3  4
@@ -1921,9 +2000,13 @@ function hvcat(rows::Tuple{Vararg{Int}}, xs::T...) where T<:Number
     a
 end
 
-function hvcat_fill(a::Array, xs::Tuple)
-    k = 1
+function hvcat_fill!(a::Array, xs::Tuple)
     nr, nc = size(a,1), size(a,2)
+    len = length(xs)
+    if nr*nc != len
+        throw(ArgumentError("argument count $(len) does not match specified shape $((nr,nc))"))
+    end
+    k = 1
     for i=1:nr
         @inbounds for j=1:nc
             a[i,j] = xs[k]
@@ -1944,11 +2027,7 @@ function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, xs::Number...) where T
             throw(ArgumentError("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
         end
     end
-    len = length(xs)
-    if nr*nc != len
-        throw(ArgumentError("argument count $(len) does not match specified shape $((nr,nc))"))
-    end
-    hvcat_fill(Matrix{T}(undef, nr, nc), xs)
+    hvcat_fill!(Matrix{T}(undef, nr, nc), xs)
 end
 
 function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as...) where T
@@ -1962,6 +2041,465 @@ function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as...) where T
     T[rs...;]
 end
 
+## N-dimensional concatenation ##
+
+"""
+    hvncat(dim::Int, row_first, values...)
+    hvncat(dims::Tuple{Vararg{Int}}, row_first, values...)
+    hvncat(shape::Tuple{Vararg{Tuple}}, row_first, values...)
+
+Horizontal, vertical, and n-dimensional concatenation of many `values` in one call.
+
+This function is called for block matrix syntax. The first argument either specifies the
+shape of the concatenation, similar to `hvcat`, as a tuple of tuples, or the dimensions that
+specify the key number of elements along each axis, and is used to determine the output
+dimensions. The `dims` form is more performant, and is used by default when the concatenation
+operation has the same number of elements along each axis (e.g., [a b; c d;;; e f ; g h]).
+The `shape` form is used when the number of elements along each axis is unbalanced
+(e.g., [a b ; c]). Unbalanced syntax needs additional validation overhead. The `dim` form
+is an optimization for concatenation along just one dimension. `row_first` indicates how
+`values` are ordered. The meaning of the first and second elements of `shape` are also
+swapped based on `row_first`.
+
+# Examples
+```jldoctest
+julia> a, b, c, d, e, f = 1, 2, 3, 4, 5, 6
+(1, 2, 3, 4, 5, 6)
+
+julia> [a b c;;; d e f]
+1×3×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2  3
+
+[:, :, 2] =
+ 4  5  6
+
+julia> hvncat((2,1,3), false, a,b,c,d,e,f)
+2×1×3 Array{Int64, 3}:
+[:, :, 1] =
+ 1
+ 2
+
+[:, :, 2] =
+ 3
+ 4
+
+[:, :, 3] =
+ 5
+ 6
+
+julia> [a b;;; c d;;; e f]
+1×2×3 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2
+
+[:, :, 2] =
+ 3  4
+
+[:, :, 3] =
+ 5  6
+
+julia> hvncat(((3, 3), (3, 3), (6,)), true, a, b, c, d, e, f)
+1×3×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2  3
+
+[:, :, 2] =
+ 4  5  6
+```
+
+# Examples for construction of the arguments:
+[a b c ; d e f ;;;
+ g h i ; j k l ;;;
+ m n o ; p q r ;;;
+ s t u ; v w x]
+=> dims = (2, 3, 4)
+
+[a b ; c ;;; d ;;;;]
+ ___   _     _
+ 2     1     1 = elements in each row (2, 1, 1)
+ _______     _
+ 3           1 = elements in each column (3, 1)
+ _____________
+ 4             = elements in each 3d slice (4,)
+ _____________
+ 4             = elements in each 4d slice (4,)
+ => shape = ((2, 1, 1), (3, 1), (4,), (4,)) with `rowfirst` = true
+"""
+hvncat(dimsshape::Tuple, row_first::Bool, xs...) = _hvncat(dimsshape, row_first, xs...)
+hvncat(dim::Int, xs...) = _hvncat(dim, true, xs...)
+
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool) = _typed_hvncat(Any, dimsshape, row_first)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs...) = _typed_hvncat(promote_eltypeof(xs...), dimsshape, row_first, xs...)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs::T...) where T<:Number = _typed_hvncat(T, dimsshape, row_first, xs...)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs::Number...) = _typed_hvncat(promote_typeof(xs...), dimsshape, row_first, xs...)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs::AbstractArray...) = _typed_hvncat(promote_eltype(xs...), dimsshape, row_first, xs...)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs::AbstractArray{T}...) where T = _typed_hvncat(T, dimsshape, row_first, xs...)
+
+
+typed_hvncat(T::Type, dimsshape::Tuple, row_first::Bool, xs...) = _typed_hvncat(T, dimsshape, row_first, xs...)
+typed_hvncat(T::Type, dim::Int, xs...) = _typed_hvncat(T, Val(dim), xs...)
+
+# 1-dimensional hvncat methods
+
+_typed_hvncat(::Type, ::Val{0}) = _typed_hvncat_0d_only_one()
+_typed_hvncat(T::Type, ::Val{0}, x) = fill(convert(T, x))
+_typed_hvncat(T::Type, ::Val{0}, x::Number) = fill(convert(T, x))
+_typed_hvncat(T::Type, ::Val{0}, x::AbstractArray) = convert.(T, x)
+_typed_hvncat(::Type, ::Val{0}, ::Any...) = _typed_hvncat_0d_only_one()
+_typed_hvncat(::Type, ::Val{0}, ::Number...) = _typed_hvncat_0d_only_one()
+_typed_hvncat(::Type, ::Val{0}, ::AbstractArray...) = _typed_hvncat_0d_only_one()
+
+_typed_hvncat_0d_only_one() =
+    throw(ArgumentError("a 0-dimensional array may only contain exactly one element"))
+
+_typed_hvncat(T::Type, dim::Int, ::Bool, xs...) = _typed_hvncat(T, Val(dim), xs...) # catches from _hvncat type promoters
+
+function _typed_hvncat(::Type{T}, ::Val{N}) where {T, N}
+    N < 0 &&
+        throw(ArgumentError("concatenation dimension must be nonnegative"))
+    return Array{T, N}(undef, ntuple(x -> 0, Val(N)))
+end
+
+function _typed_hvncat(T::Type, ::Val{N}, xs::Number...) where N
+    N < 0 &&
+        throw(ArgumentError("concatenation dimension must be nonnegative"))
+    A = cat_similar(xs[1], T, (ntuple(x -> 1, Val(N - 1))..., length(xs)))
+    hvncat_fill!(A, false, xs)
+    return A
+end
+
+function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
+    # optimization for arrays that can be concatenated by copying them linearly into the destination
+    # conditions: the elements must all have 1-length dimensions above N
+    length(as) > 0 ||
+        throw(ArgumentError("must have at least one element"))
+    N < 0 &&
+        throw(ArgumentError("concatenation dimension must be nonnegative"))
+    for a ∈ as
+        ndims(a) <= N || all(x -> size(a, x) == 1, (N + 1):ndims(a)) ||
+            return _typed_hvncat(T, (ntuple(x -> 1, Val(N - 1))..., length(as), 1), false, as...)
+            # the extra 1 is to avoid an infinite cycle
+    end
+
+    nd = N
+
+    Ndim = 0
+    for i ∈ eachindex(as)
+        Ndim += cat_size(as[i], N)
+        nd = max(nd, cat_ndims(as[i]))
+        for d ∈ 1:N - 1
+            cat_size(as[1], d) == cat_size(as[i], d) || throw(ArgumentError("mismatched size along axis $d in element $i"))
+        end
+    end
+
+    A = cat_similar(as[1], T, (ntuple(d -> size(as[1], d), N - 1)..., Ndim, ntuple(x -> 1, nd - N)...))
+    k = 1
+    for a ∈ as
+        for i ∈ eachindex(a)
+            A[k] = a[i]
+            k += 1
+        end
+    end
+    return A
+end
+
+function _typed_hvncat(::Type{T}, ::Val{N}, as...) where {T, N}
+    length(as) > 0 ||
+        throw(ArgumentError("must have at least one element"))
+    N < 0 &&
+        throw(ArgumentError("concatenation dimension must be nonnegative"))
+    nd = N
+    Ndim = 0
+    for i ∈ eachindex(as)
+        Ndim += cat_size(as[i], N)
+        nd = max(nd, cat_ndims(as[i]))
+        for d ∈ 1:N-1
+            cat_size(as[i], d) == 1 ||
+                throw(ArgumentError("all dimensions of element $i other than $N must be of length 1"))
+        end
+    end
+
+    A = Array{T, nd}(undef, ntuple(x -> 1, Val(N - 1))..., Ndim, ntuple(x -> 1, nd - N)...)
+
+    k = 1
+    for a ∈ as
+        if a isa AbstractArray
+            lena = length(a)
+            copyto!(A, k, a, 1, lena)
+            k += lena
+        else
+            A[k] = a
+            k += 1
+        end
+    end
+    return A
+end
+
+# 0-dimensional cases for balanced and unbalanced hvncat method
+
+_typed_hvncat(T::Type, ::Tuple{}, ::Bool, x...) = _typed_hvncat(T, Val(0), x...)
+_typed_hvncat(T::Type, ::Tuple{}, ::Bool, x::Number...) = _typed_hvncat(T, Val(0), x...)
+
+
+# balanced dimensions hvncat methods
+
+_typed_hvncat(T::Type, dims::Tuple{Int}, ::Bool, as...) = _typed_hvncat_1d(T, dims[1], Val(false), as...)
+_typed_hvncat(T::Type, dims::Tuple{Int}, ::Bool, as::Number...) = _typed_hvncat_1d(T, dims[1], Val(false), as...)
+
+function _typed_hvncat_1d(::Type{T}, ds::Int, ::Val{row_first}, as...) where {T, row_first}
+    lengthas = length(as)
+    ds > 0 ||
+        throw(ArgumentError("`dimsshape` argument must consist of positive integers"))
+    lengthas == ds ||
+        throw(ArgumentError("number of elements does not match `dimshape` argument; expected $ds, got $lengthas"))
+    if row_first
+        return _typed_hvncat(T, Val(2), as...)
+    else
+        return _typed_hvncat(T, Val(1), as...)
+    end
+end
+
+function _typed_hvncat(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, xs::Number...) where {T, N}
+    all(>(0), dims) ||
+        throw(ArgumentError("`dims` argument must contain positive integers"))
+    A = Array{T, N}(undef, dims...)
+    lengtha = length(A)  # Necessary to store result because throw blocks are being deoptimized right now, which leads to excessive allocations
+    lengthx = length(xs) # Cuts from 3 allocations to 1.
+    if lengtha != lengthx
+       throw(ArgumentError("argument count does not match specified shape (expected $lengtha, got $lengthx)"))
+    end
+    hvncat_fill!(A, row_first, xs)
+    return A
+end
+
+function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple)
+    # putting these in separate functions leads to unnecessary allocations
+    if row_first
+        nr, nc = size(A, 1), size(A, 2)
+        nrc = nr * nc
+        na = prod(size(A)[3:end])
+        k = 1
+        for d ∈ 1:na
+            dd = nrc * (d - 1)
+            for i ∈ 1:nr
+                Ai = dd + i
+                for j ∈ 1:nc
+                    A[Ai] = xs[k]
+                    k += 1
+                    Ai += nr
+                end
+            end
+        end
+    else
+        for k ∈ eachindex(xs)
+            A[k] = xs[k]
+        end
+    end
+end
+
+function _typed_hvncat(T::Type, dims::NTuple{N, Int}, row_first::Bool, as...) where {N}
+    # function barrier after calculating the max is necessary for high performance
+    nd = max(maximum(cat_ndims(a) for a ∈ as), N)
+    return _typed_hvncat_dims(T, (dims..., ntuple(x -> 1, nd - N)...), row_first, as)
+end
+
+function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as::Tuple) where {T, N}
+    length(as) > 0 ||
+        throw(ArgumentError("must have at least one element"))
+    all(>(0), dims) ||
+        throw(ArgumentError("`dims` argument must contain positive integers"))
+
+    d1 = row_first ? 2 : 1
+    d2 = row_first ? 1 : 2
+
+    outdims = zeros(Int, N)
+
+    # discover number of rows or columns
+    for i ∈ 1:dims[d1]
+        outdims[d1] += cat_size(as[i], d1)
+    end
+
+    currentdims = zeros(Int, N)
+    blockcount = 0
+    elementcount = 0
+    for i ∈ eachindex(as)
+        elementcount += cat_length(as[i])
+        currentdims[d1] += cat_size(as[i], d1)
+        if currentdims[d1] == outdims[d1]
+            currentdims[d1] = 0
+            for d ∈ (d2, 3:N...)
+                currentdims[d] += cat_size(as[i], d)
+                if outdims[d] == 0 # unfixed dimension
+                    blockcount += 1
+                    if blockcount == dims[d]
+                        outdims[d] = currentdims[d]
+                        currentdims[d] = 0
+                        blockcount = 0
+                    else
+                        break
+                    end
+                else # fixed dimension
+                    if currentdims[d] == outdims[d] # end of dimension
+                        currentdims[d] = 0
+                    elseif currentdims[d] < outdims[d] # dimension in progress
+                        break
+                    else # exceeded dimension
+                        throw(ArgumentError("argument $i has too many elements along axis $d"))
+                    end
+                end
+            end
+        elseif currentdims[d1] > outdims[d1] # exceeded dimension
+            throw(ArgumentError("argument $i has too many elements along axis $d1"))
+        end
+    end
+
+    outlen = prod(outdims)
+    elementcount == outlen ||
+        throw(ArgumentError("mismatched number of elements; expected $(outlen), got $(elementcount)"))
+
+    # copy into final array
+    A = cat_similar(as[1], T, outdims)
+    # @assert all(==(0), currentdims)
+    outdims .= 0
+    hvncat_fill!(A, currentdims, outdims, d1, d2, as)
+    return A
+end
+
+
+# unbalanced dimensions hvncat methods
+
+function _typed_hvncat(T::Type, shape::Tuple{Tuple}, row_first::Bool, xs...)
+    length(shape[1]) > 0 ||
+        throw(ArgumentError("each level of `shape` argument must have at least one value"))
+    return _typed_hvncat_1d(T, shape[1][1], Val(row_first), xs...)
+end
+
+function _typed_hvncat(T::Type, shape::NTuple{N, Tuple}, row_first::Bool, as...) where {N}
+    # function barrier after calculating the max is necessary for high performance
+    nd = max(maximum(cat_ndims(a) for a ∈ as), N)
+    return _typed_hvncat_shape(T, (shape..., ntuple(x -> shape[end], nd - N)...), row_first, as)
+end
+
+function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::Tuple) where {T, N}
+    length(as) > 0 ||
+        throw(ArgumentError("must have at least one element"))
+    all(>(0), tuple((shape...)...)) ||
+        throw(ArgumentError("`shape` argument must consist of positive integers"))
+
+    d1 = row_first ? 2 : 1
+    d2 = row_first ? 1 : 2
+
+    shapev = collect(shape) # saves allocations later
+    all(!isempty, shapev) ||
+        throw(ArgumentError("each level of `shape` argument must have at least one value"))
+    length(shapev[end]) == 1 ||
+        throw(ArgumentError("last level of shape must contain only one integer"))
+    shapelength = shapev[end][1]
+    lengthas = length(as)
+    shapelength == lengthas || throw(ArgumentError("number of elements does not match shape; expected $(shapelength), got $lengthas)"))
+    # discover dimensions
+    nd = max(N, cat_ndims(as[1]))
+    outdims = zeros(Int, nd)
+    currentdims = zeros(Int, nd)
+    blockcounts = zeros(Int, nd)
+    shapepos = ones(Int, nd)
+
+    elementcount = 0
+    for i ∈ eachindex(as)
+        elementcount += cat_length(as[i])
+        wasstartblock = false
+        for d ∈ 1:N
+            ad = (d < 3 && row_first) ? (d == 1 ? 2 : 1) : d
+            dsize = cat_size(as[i], ad)
+            blockcounts[d] += 1
+
+            if d == 1 || i == 1 || wasstartblock
+                currentdims[d] += dsize
+            elseif dsize != cat_size(as[i - 1], ad)
+                throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
+                                    expected $(cat_size(as[i - 1], ad)), got $dsize"))
+            end
+
+            wasstartblock = blockcounts[d] == 1 # remember for next dimension
+
+            isendblock = blockcounts[d] == shapev[d][shapepos[d]]
+            if isendblock
+                if outdims[d] == 0
+                    outdims[d] = currentdims[d]
+                elseif outdims[d] != currentdims[d]
+                    throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
+                                        expected $(abs(outdims[d] - (currentdims[d] - dsize))), got $dsize"))
+                end
+                currentdims[d] = 0
+                blockcounts[d] = 0
+                shapepos[d] += 1
+                d > 1 && (blockcounts[d - 1] == 0 ||
+                    throw(ArgumentError("shape in level $d is inconsistent; level counts must nest \
+                                        evenly into each other")))
+            end
+        end
+    end
+
+    outlen = prod(outdims)
+    elementcount == outlen ||
+        throw(ArgumentError("mismatched number of elements; expected $(outlen), got $(elementcount)"))
+
+    if row_first
+        outdims[1], outdims[2] = outdims[2], outdims[1]
+    end
+
+    # @assert all(==(0), currentdims)
+    # @assert all(==(0), blockcounts)
+
+    # copy into final array
+    A = cat_similar(as[1], T, outdims)
+    hvncat_fill!(A, currentdims, blockcounts, d1, d2, as)
+    return A
+end
+
+function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple{Vararg}) where {T, N}
+    outdims = size(A)
+    offsets = scratch1
+    inneroffsets = scratch2
+    for a ∈ as
+        if isa(a, AbstractArray)
+            for ai ∈ a
+                Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
+                A[Ai] = ai
+
+                for j ∈ 1:N
+                    inneroffsets[j] += 1
+                    inneroffsets[j] < cat_size(a, j) && break
+                    inneroffsets[j] = 0
+                end
+            end
+        else
+            Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
+            A[Ai] = a
+        end
+
+        for j ∈ (d1, d2, 3:N...)
+            offsets[j] += cat_size(a, j)
+            offsets[j] < outdims[j] && break
+            offsets[j] = 0
+        end
+    end
+end
+
+@propagate_inbounds function hvncat_calcindex(offsets::Vector{Int}, inneroffsets::Vector{Int},
+                                              outdims::Tuple{Vararg{Int}}, nd::Int)
+    Ai = inneroffsets[1] + offsets[1] + 1
+    for j ∈ 2:nd
+        increment = inneroffsets[j] + offsets[j]
+        for k ∈ 1:j-1
+            increment *= outdims[k]
+        end
+        Ai += increment
+    end
+    Ai
+end
+
 ## Reductions and accumulates ##
 
 function isequal(A::AbstractArray, B::AbstractArray)
@@ -2012,12 +2550,12 @@ end
 # _sub2ind and _ind2sub
 # fallbacks
 function _sub2ind(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     _sub2ind(axes(A), I...)
 end
 
 function _ind2sub(A::AbstractArray, ind)
-    @_inline_meta
+    @inline
     _ind2sub(axes(A), ind)
 end
 
@@ -2025,49 +2563,49 @@ end
 _sub2ind(::Tuple{}) = 1
 _sub2ind(::DimsInteger) = 1
 _sub2ind(::Indices) = 1
-_sub2ind(::Tuple{}, I::Integer...) = (@_inline_meta; _sub2ind_recurse((), 1, 1, I...))
+_sub2ind(::Tuple{}, I::Integer...) = (@inline; _sub2ind_recurse((), 1, 1, I...))
 
 # Generic cases
-_sub2ind(dims::DimsInteger, I::Integer...) = (@_inline_meta; _sub2ind_recurse(dims, 1, 1, I...))
-_sub2ind(inds::Indices, I::Integer...) = (@_inline_meta; _sub2ind_recurse(inds, 1, 1, I...))
+_sub2ind(dims::DimsInteger, I::Integer...) = (@inline; _sub2ind_recurse(dims, 1, 1, I...))
+_sub2ind(inds::Indices, I::Integer...) = (@inline; _sub2ind_recurse(inds, 1, 1, I...))
 # In 1d, there's a question of whether we're doing cartesian indexing
 # or linear indexing. Support only the former.
 _sub2ind(inds::Indices{1}, I::Integer...) =
     throw(ArgumentError("Linear indexing is not defined for one-dimensional arrays"))
-_sub2ind(inds::Tuple{OneTo}, I::Integer...) = (@_inline_meta; _sub2ind_recurse(inds, 1, 1, I...)) # only OneTo is safe
+_sub2ind(inds::Tuple{OneTo}, I::Integer...) = (@inline; _sub2ind_recurse(inds, 1, 1, I...)) # only OneTo is safe
 _sub2ind(inds::Tuple{OneTo}, i::Integer)    = i
 
 _sub2ind_recurse(::Any, L, ind) = ind
 function _sub2ind_recurse(::Tuple{}, L, ind, i::Integer, I::Integer...)
-    @_inline_meta
+    @inline
     _sub2ind_recurse((), L, ind+(i-1)*L, I...)
 end
 function _sub2ind_recurse(inds, L, ind, i::Integer, I::Integer...)
-    @_inline_meta
+    @inline
     r1 = inds[1]
     _sub2ind_recurse(tail(inds), nextL(L, r1), ind+offsetin(i, r1)*L, I...)
 end
 
 nextL(L, l::Integer) = L*l
-nextL(L, r::AbstractUnitRange) = L*unsafe_length(r)
-nextL(L, r::Slice) = L*unsafe_length(r.indices)
+nextL(L, r::AbstractUnitRange) = L*length(r)
+nextL(L, r::Slice) = L*length(r.indices)
 offsetin(i, l::Integer) = i-1
 offsetin(i, r::AbstractUnitRange) = i-first(r)
 
-_ind2sub(::Tuple{}, ind::Integer) = (@_inline_meta; ind == 1 ? () : throw(BoundsError()))
-_ind2sub(dims::DimsInteger, ind::Integer) = (@_inline_meta; _ind2sub_recurse(dims, ind-1))
-_ind2sub(inds::Indices, ind::Integer)     = (@_inline_meta; _ind2sub_recurse(inds, ind-1))
+_ind2sub(::Tuple{}, ind::Integer) = (@inline; ind == 1 ? () : throw(BoundsError()))
+_ind2sub(dims::DimsInteger, ind::Integer) = (@inline; _ind2sub_recurse(dims, ind-1))
+_ind2sub(inds::Indices, ind::Integer)     = (@inline; _ind2sub_recurse(inds, ind-1))
 _ind2sub(inds::Indices{1}, ind::Integer) =
     throw(ArgumentError("Linear indexing is not defined for one-dimensional arrays"))
 _ind2sub(inds::Tuple{OneTo}, ind::Integer) = (ind,)
 
 _ind2sub_recurse(::Tuple{}, ind) = (ind+1,)
 function _ind2sub_recurse(indslast::NTuple{1}, ind)
-    @_inline_meta
+    @inline
     (_lookup(ind, indslast[1]),)
 end
 function _ind2sub_recurse(inds, ind)
-    @_inline_meta
+    @inline
     r1 = inds[1]
     indnext, f, l = _div(ind, r1)
     (ind-l*indnext+f, _ind2sub_recurse(tail(inds), indnext)...)
@@ -2076,7 +2614,7 @@ end
 _lookup(ind, d::Integer) = ind+1
 _lookup(ind, r::AbstractUnitRange) = ind+first(r)
 _div(ind, d::Integer) = div(ind, d), 1, d
-_div(ind, r::AbstractUnitRange) = (d = unsafe_length(r); (div(ind, d), first(r), d))
+_div(ind, r::AbstractUnitRange) = (d = length(r); (div(ind, d), first(r), d))
 
 # Vectorized forms
 function _sub2ind(inds::Indices{1}, I1::AbstractVector{T}, I::AbstractVector{T}...) where T<:Integer
@@ -2098,7 +2636,7 @@ function _sub2ind_vecs(inds, I::AbstractVector...)
 end
 
 function _sub2ind!(Iout, inds, Iinds, I)
-    @_noinline_meta
+    @noinline
     for i in Iinds
         # Iout[i] = _sub2ind(inds, map(Ij -> Ij[i], I)...)
         Iout[i] = sub2ind_vec(inds, i, I)
@@ -2106,8 +2644,8 @@ function _sub2ind!(Iout, inds, Iinds, I)
     Iout
 end
 
-sub2ind_vec(inds, i, I) = (@_inline_meta; _sub2ind(inds, _sub2ind_vec(i, I...)...))
-_sub2ind_vec(i, I1, I...) = (@_inline_meta; (I1[i], _sub2ind_vec(i, I...)...))
+sub2ind_vec(inds, i, I) = (@inline; _sub2ind(inds, _sub2ind_vec(i, I...)...))
+_sub2ind_vec(i, I1, I...) = (@inline; (I1[i], _sub2ind_vec(i, I...)...))
 _sub2ind_vec(i) = ()
 
 function _ind2sub(inds::Union{DimsInteger{N},Indices{N}}, ind::AbstractVector{<:Integer}) where N
@@ -2128,18 +2666,28 @@ end
     foreach(f, c...) -> Nothing
 
 Call function `f` on each element of iterable `c`.
-For multiple iterable arguments, `f` is called elementwise.
-`foreach` should be used instead of `map` when the results of `f` are not
+For multiple iterable arguments, `f` is called elementwise, and iteration stops when
+any iterator is finished.
+
+`foreach` should be used instead of [`map`](@ref) when the results of `f` are not
 needed, for example in `foreach(println, array)`.
 
 # Examples
 ```jldoctest
-julia> a = 1:3:7;
+julia> tri = 1:3:7; res = Int[];
 
-julia> foreach(x -> println(x^2), a)
-1
-16
-49
+julia> foreach(x -> push!(res, x^2), tri)
+
+julia> res
+3-element Vector{$(Int)}:
+  1
+ 16
+ 49
+
+julia> foreach((x, y) -> println(x, " with ", y), tri, 'a':'z')
+1 with a
+4 with b
+7 with c
 ```
 """
 foreach(f) = (f(); nothing)
@@ -2160,6 +2708,8 @@ colons go in this expression. The results are concatenated along the remaining d
 For example, if `dims` is `[1,2]` and `A` is 4-dimensional, `f` is called on `A[:,:,i,j]`
 for all `i` and `j`.
 
+See also [`eachcol`](@ref), [`eachslice`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = reshape(Vector(1:16),(2,2,2,2))
@@ -2238,9 +2788,9 @@ function mapslices(f, A::AbstractArray; dims)
     end
     nextra = max(0, length(dims)-ndims(r1))
     if eltype(Rsize) == Int
-        Rsize[dims] = [size(r1)..., ntuple(d->1, nextra)...]
+        Rsize[dims] = [size(r1)..., ntuple(Returns(1), nextra)...]
     else
-        Rsize[dims] = [axes(r1)..., ntuple(d->OneTo(1), nextra)...]
+        Rsize[dims] = [axes(r1)..., ntuple(Returns(OneTo(1)), nextra)...]
     end
     R = similar(r1, tuple(Rsize...,))
 
@@ -2285,6 +2835,10 @@ end
 concatenate_setindex!(R, v, I...) = (R[I...] .= (v,); R)
 concatenate_setindex!(R, X::AbstractArray, I...) = (R[I...] = X)
 
+## 0 arguments
+
+map(f) = f()
+
 ## 1 argument
 
 function map!(f::F, dest::AbstractArray, A::AbstractArray) where F
@@ -2301,14 +2855,13 @@ map(f, A::AbstractArray) = collect_similar(A, Generator(f,A))
 mapany(f, A::AbstractArray) = map!(f, Vector{Any}(undef, length(A)), A)
 mapany(f, itr) = Any[f(x) for x in itr]
 
-# default to returning an Array for `map` on general iterators
 """
     map(f, c...) -> collection
 
 Transform collection `c` by applying `f` to each element. For multiple collection arguments,
-apply `f` elementwise.
+apply `f` elementwise, and stop when when any of them is exhausted.
 
-See also: [`mapslices`](@ref)
+See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).
 
 # Examples
 ```jldoctest
@@ -2318,14 +2871,14 @@ julia> map(x -> x * 2, [1, 2, 3])
  4
  6
 
-julia> map(+, [1, 2, 3], [10, 20, 30])
+julia> map(+, [1, 2, 3], [10, 20, 30, 400, 5000])
 3-element Vector{Int64}:
  11
  22
  33
 ```
 """
-map(f, A) = collect(Generator(f,A))
+map(f, A) = collect(Generator(f,A)) # default to returning an Array for `map` on general iterators
 
 map(f, ::AbstractDict) = error("map is not defined on dictionaries")
 map(f, ::AbstractSet) = error("map is not defined on sets")
@@ -2363,7 +2916,9 @@ end
     map!(function, destination, collection...)
 
 Like [`map`](@ref), but stores the result in `destination` rather than a new
-collection. `destination` must be at least as large as the first collection.
+collection. `destination` must be at least as large as the smallest collection.
+
+See also: [`map`](@ref), [`foreach`](@ref), [`zip`](@ref), [`copyto!`](@ref).
 
 # Examples
 ```jldoctest
@@ -2376,6 +2931,14 @@ julia> a
  2.0
  4.0
  6.0
+
+julia> map!(+, zeros(Int, 5), 100:999, 1:3)
+5-element Vector{$(Int)}:
+ 101
+ 103
+ 105
+   0
+   0
 ```
 """
 function map!(f::F, dest::AbstractArray, As::AbstractArray...) where {F}
@@ -2384,7 +2947,31 @@ function map!(f::F, dest::AbstractArray, As::AbstractArray...) where {F}
     map_n!(f, dest, As)
 end
 
-map(f) = f()
+"""
+    map(f, A::AbstractArray...) -> N-array
+
+When acting on multi-dimensional arrays of the same [`ndims`](@ref),
+they must all have the same [`axes`](@ref), and the answer will too.
+
+See also [`broadcast`](@ref), which allows mismatched sizes.
+
+# Examples
+```
+julia> map(//, [1 2; 3 4], [4 3; 2 1])
+2×2 Matrix{Rational{$Int}}:
+ 1//4  2//3
+ 3//2  4//1
+
+julia> map(+, [1 2; 3 4], zeros(2,1))
+ERROR: DimensionMismatch
+
+julia> map(+, [1 2; 3 4], [1,10,100,1000], zeros(3,1))  # iterates until 3rd is exhausted
+3-element Vector{Float64}:
+   2.0
+  13.0
+ 102.0
+```
+"""
 map(f, iters...) = collect(Generator(f, iters...))
 
 # multi-item push!, pushfirst! (built on top of type-specific 1-item version)
@@ -2476,3 +3063,46 @@ function rest(a::AbstractArray{T}, state...) where {T}
     sizehint!(v, length(a))
     return foldl(push!, Iterators.rest(a, state...), init=v)
 end
+
+
+## keepat! ##
+
+"""
+    keepat!(a::AbstractVector, inds)
+
+Remove the items at all the indices which are not given by `inds`,
+and return the modified `a`.
+Items which are kept are shifted to fill the resulting gaps.
+
+`inds` must be an iterator of sorted and unique integer indices.
+See also [`deleteat!`](@ref).
+
+!!! compat "Julia 1.7"
+    This function is available as of Julia 1.7.
+
+# Examples
+```jldoctest
+julia> keepat!([6, 5, 4, 3, 2, 1], 1:2:5)
+3-element Vector{Int64}:
+ 6
+ 4
+ 2
+```
+"""
+function keepat!(a::AbstractVector, inds)
+    local prev
+    i = firstindex(a)
+    for k in inds
+        if @isdefined(prev)
+            prev < k || throw(ArgumentError("indices must be unique and sorted"))
+        end
+        ak = a[k] # must happen even when i==k for bounds checking
+        if i != k
+            @inbounds a[i] = ak # k > i, so a[i] is inbounds
+        end
+        prev = k
+        i = nextind(a, i)
+    end
+    deleteat!(a, i:lastindex(a))
+    return a
+end
diff --git a/base/abstractarraymath.jl b/base/abstractarraymath.jl
index 953c190ab12efd..4dd24214a63fcb 100644
--- a/base/abstractarraymath.jl
+++ b/base/abstractarraymath.jl
@@ -36,7 +36,7 @@ julia> vec(1:3)
 1:3
 ```
 
-See also [`reshape`](@ref).
+See also [`reshape`](@ref), [`dropdims`](@ref).
 """
 vec(a::AbstractArray) = reshape(a,length(a))
 vec(a::AbstractVector) = a
@@ -48,9 +48,15 @@ _sub(t::Tuple, s::Tuple) = _sub(tail(t), tail(s))
 """
     dropdims(A; dims)
 
-Remove the dimensions specified by `dims` from array `A`.
-Elements of `dims` must be unique and within the range `1:ndims(A)`.
-`size(A,i)` must equal 1 for all `i` in `dims`.
+Return an array with the same data as `A`, but with the dimensions specified by
+`dims` removed. `size(A,d)` must equal 1 for every `d` in `dims`,
+and repeated dimensions or numbers outside `1:ndims(A)` are forbidden.
+
+The result shares the same underlying data as `A`, such that the
+result is mutable if and only if `A` is mutable, and setting elements of one
+alters the values of the other.
+
+See also: [`reshape`](@ref), [`vec`](@ref).
 
 # Examples
 ```jldoctest
@@ -60,11 +66,17 @@ julia> a = reshape(Vector(1:4),(2,2,1,1))
  1  3
  2  4
 
-julia> dropdims(a; dims=3)
+julia> b = dropdims(a; dims=3)
 2×2×1 Array{Int64, 3}:
 [:, :, 1] =
  1  3
  2  4
+
+julia> b[1,1,1] = 5; a
+2×2×1×1 Array{Int64, 4}:
+[:, :, 1, 1] =
+ 5  3
+ 2  4
 ```
 """
 dropdims(A; dims) = _dropdims(A, dims)
@@ -76,13 +88,8 @@ function _dropdims(A::AbstractArray, dims::Dims)
             dims[j] == dims[i] && throw(ArgumentError("dropped dims must be unique"))
         end
     end
-    d = ()
-    for i = 1:ndims(A)
-        if !in(i, dims)
-            d = tuple(d..., axes(A, i))
-        end
-    end
-    reshape(A, d::typeof(_sub(axes(A), dims)))
+    ax = _foldoneto((ds, d) -> d in dims ? ds : (ds..., axes(A,d)), (), Val(ndims(A)))
+    reshape(A, ax::typeof(_sub(axes(A), dims)))
 end
 _dropdims(A::AbstractArray, dim::Integer) = _dropdims(A, (Int(dim),))
 
@@ -106,6 +113,8 @@ Return a view of all the data of `A` where the index for dimension `d` equals `i
 
 Equivalent to `view(A,:,:,...,i,:,:,...)` where `i` is in position `d`.
 
+See also: [`eachslice`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 2 3 4; 5 6 7 8]
@@ -117,13 +126,18 @@ julia> selectdim(A, 2, 3)
 2-element view(::Matrix{Int64}, :, 3) with eltype Int64:
  3
  7
+
+julia> selectdim(A, 2, 3:4)
+2×2 view(::Matrix{Int64}, :, 3:4) with eltype Int64:
+ 3  4
+ 7  8
 ```
 """
 @inline selectdim(A::AbstractArray, d::Integer, i) = _selectdim(A, d, i, _setindex(i, d, map(Slice, axes(A))...))
 @noinline function _selectdim(A, d, i, idxs)
     d >= 1 || throw(ArgumentError("dimension must be ≥ 1, got $d"))
     nd = ndims(A)
-    d > nd && (i == 1 || throw(BoundsError(A, (ntuple(k->Colon(),d-1)..., i))))
+    d > nd && (i == 1 || throw(BoundsError(A, (ntuple(Returns(Colon()),d-1)..., i))))
     return view(A, idxs...)
 end
 
@@ -138,6 +152,8 @@ Circularly shift, i.e. rotate, the data in an array. The second argument is a tu
 vector giving the amount to shift in each dimension, or an integer to shift only in the
 first dimension.
 
+See also: [`circshift!`](@ref), [`circcopy!`](@ref), [`bitrotate`](@ref), [`<<`](@ref).
+
 # Examples
 ```jldoctest
 julia> b = reshape(Vector(1:16), (4,4))
@@ -185,8 +201,6 @@ julia> circshift(a, -1)
  1
  1
 ```
-
-See also [`circshift!`](@ref).
 """
 function circshift(a::AbstractArray, shiftamt)
     circshift!(similar(a), a, map(Integer, (shiftamt...,)))
@@ -199,6 +213,8 @@ end
 
 Construct an array by repeating array `A` a given number of times in each dimension, specified by `counts`.
 
+See also: [`fill`](@ref), [`Iterators.repeated`](@ref), [`Iterators.cycle`](@ref).
+
 # Examples
 ```jldoctest
 julia> repeat([1, 2, 3], 2)
@@ -225,7 +241,7 @@ function repeat(A::AbstractArray, counts...)
 end
 
 """
-    repeat(A::AbstractArray; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A)))
+    repeat(A::AbstractArray; inner=ntuple(Returns(1), ndims(A)), outer=ntuple(Returns(1), ndims(A)))
 
 Construct an array by repeating the entries of `A`. The i-th element of `inner` specifies
 the number of times that the individual entries of the i-th dimension of `A` should be
@@ -392,7 +408,7 @@ end#module
 Create a generator that iterates over the first dimension of vector or matrix `A`,
 returning the rows as `AbstractVector` views.
 
-See also [`eachcol`](@ref) and [`eachslice`](@ref).
+See also [`eachcol`](@ref), [`eachslice`](@ref), [`mapslices`](@ref).
 
 !!! compat "Julia 1.1"
      This function requires at least Julia 1.1.
@@ -460,7 +476,7 @@ the data from the other dimensions in `A`.
 Only a single dimension in `dims` is currently supported. Equivalent to `(view(A,:,:,...,i,:,:
 ...)) for i in axes(A, dims))`, where `i` is in position `dims`.
 
-See also [`eachrow`](@ref), [`eachcol`](@ref), and [`selectdim`](@ref).
+See also [`eachrow`](@ref), [`eachcol`](@ref), [`mapslices`](@ref), and [`selectdim`](@ref).
 
 !!! compat "Julia 1.1"
      This function requires at least Julia 1.1.
@@ -491,7 +507,7 @@ julia> collect(eachslice(M, dims=2))
     length(dims) == 1 || throw(ArgumentError("only single dimensions are supported"))
     dim = first(dims)
     dim <= ndims(A) || throw(DimensionMismatch("A doesn't have $dim dimensions"))
-    inds_before = ntuple(d->(:), dim-1)
-    inds_after = ntuple(d->(:), ndims(A)-dim)
+    inds_before = ntuple(Returns(:), dim-1)
+    inds_after = ntuple(Returns(:), ndims(A)-dim)
     return (view(A, inds_before..., i, inds_after...) for i in axes(A, dim))
 end
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index cfd964759bcced..92cfb0dcfe614c 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -66,6 +66,8 @@ function iterate(v::Union{KeySet,ValueIterator}, state...)
     return (y[1][isa(v, KeySet) ? 1 : 2], y[2])
 end
 
+copy(v::KeySet) = copymutable(v)
+
 in(k, v::KeySet) = get(v.dict, k, secret_table_token) !== secret_table_token
 
 """
@@ -234,12 +236,14 @@ Dict{Int64, Int64} with 3 entries:
 ```
 """
 function mergewith!(combine, d::AbstractDict, others::AbstractDict...)
-    for other in others
-        for (k,v) in other
-            d[k] = haskey(d, k) ? combine(d[k], v) : v
-        end
+    foldl(mergewith!(combine), others; init = d)
+end
+
+function mergewith!(combine, d1::AbstractDict, d2::AbstractDict)
+    for (k, v) in d2
+        d1[k] = haskey(d1, k) ? combine(d1[k], v) : v
     end
-    return d
+    return d1
 end
 
 mergewith!(combine) = (args...) -> mergewith!(combine, args...)
@@ -249,7 +253,7 @@ merge!(combine::Callable, args...) = mergewith!(combine, args...)
 """
     keytype(type)
 
-Get the key type of an dictionary type. Behaves similarly to [`eltype`](@ref).
+Get the key type of a dictionary type. Behaves similarly to [`eltype`](@ref).
 
 # Examples
 ```jldoctest
@@ -263,7 +267,7 @@ keytype(a::AbstractDict) = keytype(typeof(a))
 """
     valtype(type)
 
-Get the value type of an dictionary type. Behaves similarly to [`eltype`](@ref).
+Get the value type of a dictionary type. Behaves similarly to [`eltype`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/base/abstractset.jl b/base/abstractset.jl
index 179b9f7be5d4b0..573af07b184c85 100644
--- a/base/abstractset.jl
+++ b/base/abstractset.jl
@@ -13,6 +13,8 @@ copy!(dst::AbstractSet, src::AbstractSet) = union!(empty!(dst), src)
 
 Construct the union of sets. Maintain order with arrays.
 
+See also: [`intersect`](@ref), [`isdisjoint`](@ref), [`vcat`](@ref), [`Iterators.flatten`](@ref).
+
 # Examples
 ```jldoctest
 julia> union([1, 2], [3, 4])
@@ -43,8 +45,6 @@ Set{Int64} with 3 elements:
 """
 function union end
 
-_in(itr) = x -> x in itr
-
 union(s, sets...) = union!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
 union(s::AbstractSet) = copy(s)
 
@@ -81,7 +81,11 @@ end
 max_values(::Type) = typemax(Int)
 max_values(T::Union{map(X -> Type{X}, BitIntegerSmall_types)...}) = 1 << (8*sizeof(T))
 # saturated addition to prevent overflow with typemax(Int)
-max_values(T::Union) = max(max_values(T.a), max_values(T.b), max_values(T.a) + max_values(T.b))
+function max_values(T::Union)
+    a = max_values(T.a)::Int
+    b = max_values(T.b)::Int
+    return max(a, b, a + b)
+end
 max_values(::Type{Bool}) = 2
 max_values(::Type{Nothing}) = 1
 
@@ -101,6 +105,12 @@ end
 Construct the intersection of sets.
 Maintain order with arrays.
 
+See also: [`setdiff`](@ref), [`isdisjoint`](@ref), [`issubset`](@ref Base.issubset), [`issetequal`](@ref).
+
+!!! compat "Julia 1.8"
+    As of Julia 1.8 intersect returns a result with the eltype of the
+    type-promoted eltypes of the two inputs
+
 # Examples
 ```jldoctest
 julia> intersect([1, 2, 3], [3, 4, 5])
@@ -117,9 +127,12 @@ Set{Int64} with 1 element:
   2
 ```
 """
-intersect(s::AbstractSet, itr, itrs...) = intersect!(intersect(s, itr), itrs...)
+function intersect(s::AbstractSet, itr, itrs...)
+    T = promote_eltype(s, itr, itrs...)
+    return intersect!(Set{T}(s), itr, itrs...)
+end
 intersect(s) = union(s)
-intersect(s::AbstractSet, itr) = mapfilter(_in(s), push!, itr, emptymutable(s))
+intersect(s::AbstractSet, itr) = mapfilter(in(s), push!, itr, emptymutable(s, promote_eltype(s, itr)))
 
 const ∩ = intersect
 
@@ -135,7 +148,7 @@ function intersect!(s::AbstractSet, itrs...)
     end
     return s
 end
-intersect!(s::AbstractSet, s2::AbstractSet) = filter!(_in(s2), s)
+intersect!(s::AbstractSet, s2::AbstractSet) = filter!(in(s2), s)
 intersect!(s::AbstractSet, itr) =
     intersect!(s, union!(emptymutable(s, eltype(itr)), itr))
 
@@ -145,6 +158,8 @@ intersect!(s::AbstractSet, itr) =
 Construct the set of elements in `s` but not in any of the iterables in `itrs`.
 Maintain order with arrays.
 
+See also [`setdiff!`](@ref), [`union`](@ref) and [`intersect`](@ref).
+
 # Examples
 ```jldoctest
 julia> setdiff([1,2,3], [3,4,5])
@@ -194,6 +209,8 @@ Construct the symmetric difference of elements in the passed in sets.
 When `s` is not an `AbstractSet`, the order is maintained.
 Note that in this case the multiplicity of elements matters.
 
+See also [`symdiff!`](@ref), [`setdiff`](@ref), [`union`](@ref) and [`intersect`](@ref).
+
 # Examples
 ```jldoctest
 julia> symdiff([1,2,3], [3,4,5], [4,5,6])
@@ -246,6 +263,8 @@ function ⊇ end
 
 Determine whether every element of `a` is also in `b`, using [`in`](@ref).
 
+See also [`⊊`](@ref), [`⊈`](@ref), [`∩`](@ref intersect), [`∪`](@ref union), [`contains`](@ref).
+
 # Examples
 ```jldoctest
 julia> issubset([1, 2], [1, 2, 3])
@@ -262,21 +281,21 @@ issubset, ⊆, ⊇
 
 const FASTIN_SET_THRESHOLD = 70
 
-function issubset(l, r)
-    if haslength(r) && (isa(l, AbstractSet) || !hasfastin(r))
-        rlen = length(r) # conditions above make this length computed only when needed
-        # check l for too many unique elements
-        if isa(l, AbstractSet) && length(l) > rlen
+function issubset(a, b)
+    if haslength(b) && (isa(a, AbstractSet) || !hasfastin(b))
+        blen = length(b) # conditions above make this length computed only when needed
+        # check a for too many unique elements
+        if isa(a, AbstractSet) && length(a) > blen
             return false
         end
-        # when `in` would be too slow and r is big enough, convert it to a Set
+        # when `in` would be too slow and b is big enough, convert it to a Set
         # this threshold was empirically determined (cf. #26198)
-        if !hasfastin(r) && rlen > FASTIN_SET_THRESHOLD
-            return issubset(l, Set(r))
+        if !hasfastin(b) && blen > FASTIN_SET_THRESHOLD
+            return issubset(a, Set(b))
         end
     end
-    for elt in l
-        elt in r || return false
+    for elt in a
+        elt in b || return false
     end
     return true
 end
@@ -294,7 +313,7 @@ hasfastin(::Type) = false
 hasfastin(::Union{Type{<:AbstractSet},Type{<:AbstractDict},Type{<:AbstractRange}}) = true
 hasfastin(x) = hasfastin(typeof(x))
 
-⊇(l, r) = r ⊆ l
+⊇(a, b) = b ⊆ a
 
 ## strict subset comparison
 
@@ -306,6 +325,8 @@ function ⊋ end
 
 Determines if `a` is a subset of, but not equal to, `b`.
 
+See also [`issubset`](@ref) (`⊆`), [`⊈`](@ref).
+
 # Examples
 ```jldoctest
 julia> (1, 2) ⊊ (1, 2, 3)
@@ -317,9 +338,9 @@ false
 """
 ⊊, ⊋
 
-⊊(l::AbstractSet, r) = length(l) < length(r) && l ⊆ r
-⊊(l, r) = Set(l) ⊊ r
-⊋(l, r) = r ⊊ l
+⊊(a::AbstractSet, b) = length(a) < length(b) && a ⊆ b
+⊊(a, b) = Set(a) ⊊ b
+⊋(a, b) = b ⊊ a
 
 function ⊈ end
 function ⊉ end
@@ -329,6 +350,8 @@ function ⊉ end
 
 Negation of `⊆` and `⊇`, i.e. checks that `a` is not a subset of `b`.
 
+See also [`issubset`](@ref) (`⊆`), [`⊊`](@ref).
+
 # Examples
 ```jldoctest
 julia> (1, 2) ⊈ (2, 3)
@@ -340,8 +363,8 @@ false
 """
 ⊈, ⊉
 
-⊈(l, r) = !⊆(l, r)
-⊉(l, r) = r ⊈ l
+⊈(a, b) = !⊆(a, b)
+⊉(a, b) = b ⊈ a
 
 ## set equality comparison
 
@@ -351,6 +374,8 @@ false
 Determine whether `a` and `b` have the same elements. Equivalent
 to `a ⊆ b && b ⊆ a` but more efficient when possible.
 
+See also: [`isdisjoint`](@ref), [`union`](@ref).
+
 # Examples
 ```jldoctest
 julia> issetequal([1, 2], [1, 2, 3])
@@ -360,54 +385,65 @@ julia> issetequal([1, 2], [2, 1])
 true
 ```
 """
-issetequal(l::AbstractSet, r::AbstractSet) = l == r
-issetequal(l::AbstractSet, r) = issetequal(l, Set(r))
+issetequal(a::AbstractSet, b::AbstractSet) = a == b
+issetequal(a::AbstractSet, b) = issetequal(a, Set(b))
 
-function issetequal(l, r::AbstractSet)
-    if haslength(l)
-        # check r for too many unique elements
-        length(l) < length(r) && return false
+function issetequal(a, b::AbstractSet)
+    if haslength(a)
+        # check b for too many unique elements
+        length(a) < length(b) && return false
     end
-    return issetequal(Set(l), r)
+    return issetequal(Set(a), b)
 end
 
-function issetequal(l, r)
-    haslength(l) && return issetequal(l, Set(r))
-    haslength(r) && return issetequal(r, Set(l))
-    return issetequal(Set(l), Set(r))
+function issetequal(a, b)
+    haslength(a) && return issetequal(a, Set(b))
+    haslength(b) && return issetequal(b, Set(a))
+    return issetequal(Set(a), Set(b))
 end
 
 ## set disjoint comparison
 """
-    isdisjoint(v1, v2) -> Bool
+    isdisjoint(a, b) -> Bool
+
+Determine whether the collections `a` and `b` are disjoint.
+Equivalent to `isempty(a ∩ b)` but more efficient when possible.
 
-Return whether the collections `v1` and `v2` are disjoint, i.e. whether
-their intersection is empty.
+See also: [`intersect`](@ref), [`isempty`](@ref), [`issetequal`](@ref).
 
 !!! compat "Julia 1.5"
     This function requires at least Julia 1.5.
+
+# Examples
+```jldoctest
+julia> isdisjoint([1, 2], [2, 3, 4])
+false
+
+julia> isdisjoint([3, 1], [2, 4])
+true
+```
 """
-function isdisjoint(l, r)
-    function _isdisjoint(l, r)
-        hasfastin(r) && return !any(in(r), l)
-        hasfastin(l) && return !any(in(l), r)
-        haslength(r) && length(r) < FASTIN_SET_THRESHOLD &&
-            return !any(in(r), l)
-        return !any(in(Set(r)), l)
+function isdisjoint(a, b)
+    function _isdisjoint(a, b)
+        hasfastin(b) && return !any(in(b), a)
+        hasfastin(a) && return !any(in(a), b)
+        haslength(b) && length(b) < FASTIN_SET_THRESHOLD &&
+            return !any(in(b), a)
+        return !any(in(Set(b)), a)
     end
-    if haslength(l) && haslength(r) && length(r) < length(l)
-        return _isdisjoint(r, l)
+    if haslength(a) && haslength(b) && length(b) < length(a)
+        return _isdisjoint(b, a)
     end
-    _isdisjoint(l, r)
+    _isdisjoint(a, b)
 end
 
 ## partial ordering of sets by containment
 
-==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l ⊆ r
+==(a::AbstractSet, b::AbstractSet) = length(a) == length(b) && a ⊆ b
 # convenience functions for AbstractSet
 # (if needed, only their synonyms ⊊ and ⊆ must be specialized)
-<( l::AbstractSet, r::AbstractSet) = l ⊊ r
-<=(l::AbstractSet, r::AbstractSet) = l ⊆ r
+<( a::AbstractSet, b::AbstractSet) = a ⊊ b
+<=(a::AbstractSet, b::AbstractSet) = a ⊆ b
 
 ## filtering sets
 
diff --git a/base/accumulate.jl b/base/accumulate.jl
index f90f85b315d7c5..6f0b6e7d05ba37 100644
--- a/base/accumulate.jl
+++ b/base/accumulate.jl
@@ -204,6 +204,8 @@ Cumulative product of an iterator. See also
 [`cumprod!`](@ref) to use a preallocated output array, both for performance and
 to control the precision of the output (e.g. to avoid overflow).
 
+See also [`cumprod!`](@ref), [`accumulate`](@ref), [`cumsum`](@ref).
+
 !!! compat "Julia 1.5"
     `cumprod` on a non-array iterator requires at least Julia 1.5.
 
@@ -291,10 +293,10 @@ function accumulate(op, A; dims::Union{Nothing,Integer}=nothing, kw...)
         # This branch takes care of the cases not handled by `_accumulate!`.
         return collect(Iterators.accumulate(op, A; kw...))
     end
-    nt = kw.data
-    if nt isa NamedTuple{()}
+    nt = values(kw)
+    if isempty(kw)
         out = similar(A, promote_op(op, eltype(A), eltype(A)))
-    elseif nt isa NamedTuple{(:init,)}
+    elseif keys(nt) === (:init,)
         out = similar(A, promote_op(op, typeof(nt.init), eltype(A)))
     else
         throw(ArgumentError("acccumulate does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
@@ -354,10 +356,10 @@ julia> B
 ```
 """
 function accumulate!(op, B, A; dims::Union{Integer, Nothing} = nothing, kw...)
-    nt = kw.data
-    if nt isa NamedTuple{()}
+    nt = values(kw)
+    if isempty(kw)
         _accumulate!(op, B, A, dims, nothing)
-    elseif nt isa NamedTuple{(:init,)}
+    elseif keys(kw) === (:init,)
         _accumulate!(op, B, A, dims, Some(nt.init))
     else
         throw(ArgumentError("acccumulate! does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
diff --git a/base/array.jl b/base/array.jl
index e680d035897be6..bd9d3b87335416 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -9,7 +9,7 @@ The objects called do not have matching dimensionality. Optional argument `msg`
 descriptive error string.
 """
 struct DimensionMismatch <: Exception
-    msg::AbstractString
+    msg::String
 end
 DimensionMismatch() = DimensionMismatch("")
 
@@ -54,6 +54,8 @@ Array
 
 One-dimensional dense array with elements of type `T`, often used to represent
 a mathematical vector. Alias for [`Array{T,1}`](@ref).
+
+See also [`empty`](@ref), [`similar`](@ref) and [`zero`](@ref) for creating vectors.
 """
 const Vector{T} = Array{T,1}
 
@@ -62,12 +64,28 @@ const Vector{T} = Array{T,1}
 
 Two-dimensional dense array with elements of type `T`, often used to represent
 a mathematical matrix. Alias for [`Array{T,2}`](@ref).
+
+See also [`fill`](@ref), [`zeros`](@ref), [`undef`](@ref) and [`similar`](@ref)
+for creating matrices.
 """
 const Matrix{T} = Array{T,2}
+
 """
     VecOrMat{T}
 
-Union type of [`Vector{T}`](@ref) and [`Matrix{T}`](@ref).
+Union type of [`Vector{T}`](@ref) and [`Matrix{T}`](@ref) which allows functions to accept either a Matrix or a Vector.
+
+# Examples
+```jldoctest
+julia> Vector{Float64} <: VecOrMat{Float64}
+true
+
+julia> Matrix{Float64} <: VecOrMat{Float64}
+true
+
+julia> Array{Float64, 3} <: VecOrMat{Float64}
+false
+```
 """
 const VecOrMat{T} = Union{Vector{T}, Matrix{T}}
 
@@ -132,7 +150,7 @@ end
 size(a::Array, d::Integer) = arraysize(a, convert(Int, d))
 size(a::Vector) = (arraysize(a,1),)
 size(a::Matrix) = (arraysize(a,1), arraysize(a,2))
-size(a::Array{<:Any,N}) where {N} = (@_inline_meta; ntuple(M -> size(a, M), Val(N))::Dims)
+size(a::Array{<:Any,N}) where {N} = (@inline; ntuple(M -> size(a, M), Val(N))::Dims)
 
 asize_from(a::Array, n) = n > ndims(a) ? () : (arraysize(a,n), asize_from(a, n+1)...)
 
@@ -156,7 +174,7 @@ isbitsunion(u::Union) = allocatedinline(u)
 isbitsunion(x) = false
 
 function _unsetindex!(A::Array{T}, i::Int) where {T}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, i)
     t = @_gc_preserve_begin A
     p = Ptr{Ptr{Cvoid}}(pointer(A, i))
@@ -199,7 +217,7 @@ elsize(::Type{<:Array{T}}) where {T} = aligned_sizeof(T)
 sizeof(a::Array) = Core.sizeof(a)
 
 function isassigned(a::Array, i::Int...)
-    @_inline_meta
+    @inline
     ii = (_sub2ind(size(a), i...) % UInt) - 1
     @boundscheck ii < length(a) % UInt || return false
     ccall(:jl_array_isassigned, Cint, (Any, UInt), a, ii) == 1
@@ -318,7 +336,7 @@ end
 # occurs, see discussion in #27874.
 # It is also mitigated by using a constant string.
 function _throw_argerror()
-    @_noinline_meta
+    @noinline
     throw(ArgumentError("Number of elements to copy must be nonnegative."))
 end
 
@@ -343,6 +361,8 @@ end
 Create a shallow copy of `x`: the outer structure is copied, but not all internal values.
 For example, copying an array produces a new array with identically-same elements as the
 original.
+
+See also [`copy!`](@ref Base.copy!), [`copyto!`](@ref).
 """
 copy
 
@@ -388,10 +408,10 @@ function getindex(::Type{T}, vals...) where T
     return a
 end
 
-getindex(::Type{T}) where {T} = (@_inline_meta; Vector{T}())
-getindex(::Type{T}, x) where {T} = (@_inline_meta; a = Vector{T}(undef, 1); @inbounds a[1] = x; a)
-getindex(::Type{T}, x, y) where {T} = (@_inline_meta; a = Vector{T}(undef, 2); @inbounds (a[1] = x; a[2] = y); a)
-getindex(::Type{T}, x, y, z) where {T} = (@_inline_meta; a = Vector{T}(undef, 3); @inbounds (a[1] = x; a[2] = y; a[3] = z); a)
+getindex(::Type{T}) where {T} = (@inline; Vector{T}())
+getindex(::Type{T}, x) where {T} = (@inline; a = Vector{T}(undef, 1); @inbounds a[1] = x; a)
+getindex(::Type{T}, x, y) where {T} = (@inline; a = Vector{T}(undef, 2); @inbounds (a[1] = x; a[2] = y); a)
+getindex(::Type{T}, x, y, z) where {T} = (@inline; a = Vector{T}(undef, 3); @inbounds (a[1] = x; a[2] = y; a[3] = z); a)
 
 function getindex(::Type{Any}, @nospecialize vals...)
     a = Vector{Any}(undef, length(vals))
@@ -411,14 +431,76 @@ to_dim(d::Integer) = d
 to_dim(d::OneTo) = last(d)
 
 """
-    fill(x, dims::Tuple)
-    fill(x, dims...)
+    fill(value, dims::Tuple)
+    fill(value, dims...)
+
+Create an array of size `dims` with every location set to `value`.
+
+For example, `fill(1.0, (5,5))` returns a 5×5 array of floats,
+with `1.0` in every location of the array.
 
-Create an array filled with the value `x`. For example, `fill(1.0, (5,5))` returns a 5×5
-array of floats, with each element initialized to `1.0`.
+The dimension lengths `dims` may be specified as either a tuple or a sequence of arguments.
+An `N`-length tuple or `N` arguments following the `value` specify an `N`-dimensional
+array. Thus, a common idiom for creating a zero-dimensional array with its only location
+set to `x` is `fill(x)`.
 
-`dims` may be specified as either a tuple or a sequence of arguments. For example,
-the common idiom `fill(x)` creates a zero-dimensional array containing the single value `x`.
+Every location of the returned array is set to (and is thus [`===`](@ref) to)
+the `value` that was passed; this means that if the `value` is itself modified,
+all elements of the `fill`ed array will reflect that modification because they're
+_still_ that very `value`. This is of no concern with `fill(1.0, (5,5))` as the
+`value` `1.0` is immutable and cannot itself be modified, but can be unexpected
+with mutable values like — most commonly — arrays.  For example, `fill([], 3)`
+places _the very same_ empty array in all three locations of the returned vector:
+
+```jldoctest
+julia> v = fill([], 3)
+3-element Vector{Vector{Any}}:
+ []
+ []
+ []
+
+julia> v[1] === v[2] === v[3]
+true
+
+julia> value = v[1]
+Any[]
+
+julia> push!(value, 867_5309)
+1-element Vector{Any}:
+ 8675309
+
+julia> v
+3-element Vector{Vector{Any}}:
+ [8675309]
+ [8675309]
+ [8675309]
+```
+
+To create an array of many independent inner arrays, use a [comprehension](@ref man-comprehensions) instead.
+This creates a new and distinct array on each iteration of the loop:
+
+```jldoctest
+julia> v2 = [[] for _ in 1:3]
+3-element Vector{Vector{Any}}:
+ []
+ []
+ []
+
+julia> v2[1] === v2[2] === v2[3]
+false
+
+julia> push!(v2[1], 8675309)
+1-element Vector{Any}:
+ 8675309
+
+julia> v2
+3-element Vector{Vector{Any}}:
+ [8675309]
+ []
+ []
+```
+
+See also: [`fill!`](@ref), [`zeros`](@ref), [`ones`](@ref), [`similar`](@ref).
 
 # Examples
 ```jldoctest
@@ -430,15 +512,15 @@ julia> fill(1.0, (2,3))
 julia> fill(42)
 0-dimensional Array{Int64, 0}:
 42
-```
 
-If `x` is an object reference, all elements will refer to the same object:
-```jldoctest
-julia> A = fill(zeros(2), 2);
+julia> A = fill(zeros(2), 2) # sets both elements to the same [0.0, 0.0] vector
+2-element Vector{Vector{Float64}}:
+ [0.0, 0.0]
+ [0.0, 0.0]
 
-julia> A[1][1] = 42; # modifies both A[1][1] and A[2][1]
+julia> A[1][1] = 42; # modifies the filled value to be [42.0, 0.0]
 
-julia> A
+julia> A # both A[1] and A[2] are the very same vector
 2-element Vector{Vector{Float64}}:
  [42.0, 0.0]
  [42.0, 0.0]
@@ -456,7 +538,7 @@ fill(v, dims::Tuple{}) = (a=Array{typeof(v),0}(undef, dims); fill!(a, v); a)
     zeros([T=Float64,] dims...)
 
 Create an `Array`, with element type `T`, of all zeros with size specified by `dims`.
-See also [`fill`](@ref), [`ones`](@ref).
+See also [`fill`](@ref), [`ones`](@ref), [`zero`](@ref).
 
 # Examples
 ```jldoctest
@@ -477,7 +559,7 @@ function zeros end
     ones([T=Float64,] dims...)
 
 Create an `Array`, with element type `T`, of all ones with size specified by `dims`.
-See also: [`fill`](@ref), [`zeros`](@ref).
+See also [`fill`](@ref), [`zeros`](@ref).
 
 # Examples
 ```jldoctest
@@ -587,6 +669,8 @@ Return an `Array` of all items in a collection or iterator. For dictionaries, re
 [`HasShape`](@ref IteratorSize) trait, the result will have the same shape
 and number of dimensions as the argument.
 
+Used by comprehensions to turn a generator into an `Array`.
+
 # Examples
 ```jldoctest
 julia> collect(1:2:13)
@@ -598,6 +682,13 @@ julia> collect(1:2:13)
   9
  11
  13
+
+julia> [x^2 for x in 1:8 if isodd(x)]
+4-element Vector{Int64}:
+  1
+  9
+ 25
+ 49
 ```
 """
 collect(itr) = _collect(1:1 #= Array =#, itr, IteratorEltype(itr), IteratorSize(itr))
@@ -648,10 +739,11 @@ if isdefined(Core, :Compiler)
         I = esc(itr)
         return quote
             if $I isa Generator && ($I).f isa Type
-                ($I).f
+                T = ($I).f
             else
-                Core.Compiler.return_type(_iterator_upper_bound, Tuple{typeof($I)})
+                T = Core.Compiler.return_type(_iterator_upper_bound, Tuple{typeof($I)})
             end
+            promote_typejoin_union(T)
         end
     end
 else
@@ -659,7 +751,7 @@ else
         I = esc(itr)
         return quote
             if $I isa Generator && ($I).f isa Type
-                ($I).f
+                promote_typejoin_union($I.f)
             else
                 Any
             end
@@ -667,8 +759,10 @@ else
     end
 end
 
-_array_for(::Type{T}, itr, ::HasLength) where {T} = Vector{T}(undef, Int(length(itr)::Integer))
-_array_for(::Type{T}, itr, ::HasShape{N}) where {T,N} = similar(Array{T,N}, axes(itr))
+_array_for(::Type{T}, itr, isz::HasLength) where {T} = _array_for(T, itr, isz, length(itr))
+_array_for(::Type{T}, itr, isz::HasShape{N}) where {T,N} = _array_for(T, itr, isz, axes(itr))
+_array_for(::Type{T}, itr, ::HasLength, len) where {T} = Vector{T}(undef, len)
+_array_for(::Type{T}, itr, ::HasShape{N}, axs) where {T,N} = similar(Array{T,N}, axs)
 
 function collect(itr::Generator)
     isz = IteratorSize(itr.iter)
@@ -676,12 +770,18 @@ function collect(itr::Generator)
     if isa(isz, SizeUnknown)
         return grow_to!(Vector{et}(), itr)
     else
+        shape = isz isa HasLength ? length(itr) : axes(itr)
         y = iterate(itr)
         if y === nothing
             return _array_for(et, itr.iter, isz)
         end
         v1, st = y
-        collect_to_with_first!(_array_for(typeof(v1), itr.iter, isz), v1, itr, st)
+        dest = _array_for(typeof(v1), itr.iter, isz, shape)
+        # The typeassert gives inference a helping hand on the element type and dimensionality
+        # (work-around for #28382)
+        et′ = et <: Type ? Type : et
+        RT = dest isa AbstractArray ? AbstractArray{<:et′, ndims(dest)} : Any
+        collect_to_with_first!(dest, v1, itr, st)::RT
     end
 end
 
@@ -709,7 +809,7 @@ function collect_to_with_first!(dest, v1, itr, st)
 end
 
 function setindex_widen_up_to(dest::AbstractArray{T}, el, i) where T
-    @_inline_meta
+    @inline
     new = similar(dest, promote_typejoin(T, typeof(el)))
     f = first(LinearIndices(dest))
     copyto!(new, first(LinearIndices(new)), dest, f, i-f)
@@ -745,7 +845,7 @@ function grow_to!(dest, itr)
 end
 
 function push_widen(dest, el)
-    @_inline_meta
+    @inline
     new = sizehint!(empty(dest, promote_typejoin(eltype(dest), typeof(el))), length(dest))
     if new isa AbstractSet
         # TODO: merge back these two branches when copy! is re-enabled for sets/vectors
@@ -775,7 +875,7 @@ end
 
 ## Iteration ##
 
-iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
+iterate(A::Array, i=1) = (@inline; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
 
 ## Indexing: getindex ##
 
@@ -785,6 +885,8 @@ iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds
 Retrieve the value(s) stored at the given key or index within a collection. The syntax
 `a[i,j,...]` is converted by the compiler to `getindex(a, i, j, ...)`.
 
+See also [`get`](@ref), [`keys`](@ref), [`eachindex`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = Dict("a" => 1, "b" => 2)
@@ -800,19 +902,23 @@ function getindex end
 
 # This is more complicated than it needs to be in order to get Win64 through bootstrap
 @eval getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)
-@eval getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@_inline_meta; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))
+@eval getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@inline; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))
 
-# Faster contiguous indexing using copyto! for UnitRange and Colon
-function getindex(A::Array, I::UnitRange{Int})
-    @_inline_meta
+# Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
+function getindex(A::Array, I::AbstractUnitRange{<:Integer})
+    @inline
     @boundscheck checkbounds(A, I)
     lI = length(I)
-    X = similar(A, lI)
+    X = similar(A, axes(I))
     if lI > 0
-        unsafe_copyto!(X, 1, A, first(I), lI)
+        copyto!(X, firstindex(X), A, first(I), lI)
     end
     return X
 end
+
+# getindex for carrying out logical indexing for AbstractUnitRange{Bool} as Bool <: Integer
+getindex(a::Array, r::AbstractUnitRange{Bool}) = getindex(a, to_index(r))
+
 function getindex(A::Array, c::Colon)
     lI = length(A)
     X = similar(A, lI)
@@ -839,7 +945,7 @@ function setindex! end
 
 @eval setindex!(A::Array{T}, x, i1::Int) where {T} = arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1)
 @eval setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@_inline_meta; arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1, i2, I...))
+    (@inline; arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1, i2, I...))
 
 # This is redundant with the abstract fallbacks but needed and helpful for bootstrap
 function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
@@ -858,8 +964,8 @@ function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
 end
 
 # Faster contiguous setindex! with copyto!
-function setindex!(A::Array{T}, X::Array{T}, I::UnitRange{Int}) where T
-    @_inline_meta
+function setindex!(A::Array{T}, X::Array{T}, I::AbstractUnitRange{Int}) where T
+    @inline
     @boundscheck checkbounds(A, I)
     lI = length(I)
     @boundscheck setindex_shape_check(X, lI)
@@ -869,7 +975,7 @@ function setindex!(A::Array{T}, X::Array{T}, I::UnitRange{Int}) where T
     return A
 end
 function setindex!(A::Array{T}, X::Array{T}, c::Colon) where T
-    @_inline_meta
+    @inline
     lI = length(A)
     @boundscheck setindex_shape_check(X, lI)
     if lI > 0
@@ -921,6 +1027,8 @@ collection to it. The result of the preceding example is equivalent to `append!(
 5, 6])`. For `AbstractSet` objects, [`union!`](@ref) can be used instead.
 
 See [`sizehint!`](@ref) for notes about the performance model.
+
+See also [`pushfirst!`](@ref).
 """
 function push! end
 
@@ -928,7 +1036,7 @@ function push!(a::Array{T,1}, item) where T
     # convert first so we don't grow the array if the assignment won't work
     itemT = convert(T, item)
     _growend!(a, 1)
-    a[end] = itemT
+    @inbounds a[end] = itemT
     return a
 end
 
@@ -970,6 +1078,9 @@ themselves in another collection. The result of the preceding example is equival
 `push!([1, 2, 3], 4, 5, 6)`.
 
 See [`sizehint!`](@ref) for notes about the performance model.
+
+See also [`vcat`](@ref) for vectors, [`union!`](@ref) for sets,
+and [`prepend!`](@ref) and [`pushfirst!`](@ref) for the opposite order.
 """
 function append!(a::Vector, items::AbstractVector)
     itemindices = eachindex(items)
@@ -1144,6 +1255,8 @@ Remove an item in `collection` and return it. If `collection` is an
 ordered container, the last item is returned; for unordered containers,
 an arbitrary element is returned.
 
+See also: [`popfirst!`](@ref), [`popat!`](@ref), [`delete!`](@ref), [`deleteat!`](@ref), [`splice!`](@ref), and [`push!`](@ref).
+
 # Examples
 ```jldoctest
 julia> A=[1, 2, 3]
@@ -1192,7 +1305,8 @@ Remove the item at the given `i` and return it. Subsequent items
 are shifted to fill the resulting gap.
 When `i` is not a valid index for `a`, return `default`, or throw an error if
 `default` is not specified.
-See also [`deleteat!`](@ref) and [`splice!`](@ref).
+
+See also: [`pop!`](@ref), [`popfirst!`](@ref), [`deleteat!`](@ref), [`splice!`](@ref).
 
 !!! compat "Julia 1.5"
     This function is available as of Julia 1.5.
@@ -1265,6 +1379,8 @@ Remove the first `item` from `collection`.
 
 This function is called `shift` in many other programming languages.
 
+See also: [`pop!`](@ref), [`popat!`](@ref), [`delete!`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1, 2, 3, 4, 5, 6]
@@ -1303,16 +1419,19 @@ end
 Insert an `item` into `a` at the given `index`. `index` is the index of `item` in
 the resulting `a`.
 
+See also: [`push!`](@ref), [`replace`](@ref), [`popat!`](@ref), [`splice!`](@ref).
+
 # Examples
 ```jldoctest
-julia> insert!([6, 5, 4, 2, 1], 4, 3)
-6-element Vector{Int64}:
- 6
- 5
- 4
- 3
- 2
+julia> insert!(Any[1:6;], 3, "here")
+7-element Vector{Any}:
  1
+ 2
+  "here"
+ 3
+ 4
+ 5
+ 6
 ```
 """
 function insert!(a::Array{T,1}, i::Integer, item) where T
@@ -1330,6 +1449,8 @@ end
 Remove the item at the given `i` and return the modified `a`. Subsequent items
 are shifted to fill the resulting gap.
 
+See also: [`delete!`](@ref), [`popat!`](@ref), [`splice!`](@ref).
+
 # Examples
 ```jldoctest
 julia> deleteat!([6, 5, 4, 3, 2, 1], 2)
@@ -1343,7 +1464,7 @@ julia> deleteat!([6, 5, 4, 3, 2, 1], 2)
 """
 deleteat!(a::Vector, i::Integer) = (_deleteat!(a, i, 1); a)
 
-function deleteat!(a::Vector, r::UnitRange{<:Integer})
+function deleteat!(a::Vector, r::AbstractUnitRange{<:Integer})
     n = length(a)
     isempty(r) || _deleteat!(a, first(r), length(r))
     return a
@@ -1441,6 +1562,8 @@ Subsequent items are shifted left to fill the resulting gap.
 If specified, replacement values from an ordered
 collection will be spliced in place of the removed item.
 
+See also: [`replace`](@ref), [`delete!`](@ref), [`deleteat!`](@ref), [`pop!`](@ref), [`popat!`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [6, 5, 4, 3, 2, 1]; splice!(A, 5)
@@ -1507,7 +1630,7 @@ Remove items at specified indices, and return a collection containing
 the removed items.
 Subsequent items are shifted left to fill the resulting gaps.
 If specified, replacement values from an ordered collection will be spliced in
-place of the removed items; in this case, `indices` must be a `UnitRange`.
+place of the removed items; in this case, `indices` must be a `AbstractUnitRange`.
 
 To insert `replacement` before an index `n` without removing any items, use
 `splice!(collection, n:n-1, replacement)`.
@@ -1515,6 +1638,9 @@ To insert `replacement` before an index `n` without removing any items, use
 !!! compat "Julia 1.5"
     Prior to Julia 1.5, `indices` must always be a `UnitRange`.
 
+!!! compat "Julia 1.8"
+    Prior to Julia 1.8, `indices` must be a `UnitRange` if splicing in replacement values.
+
 # Examples
 ```jldoctest
 julia> A = [-1, -2, -3, 5, 4, 3, -1]; splice!(A, 4:3, 2)
@@ -1532,7 +1658,7 @@ julia> A
  -1
 ```
 """
-function splice!(a::Vector, r::UnitRange{<:Integer}, ins=_default_splice)
+function splice!(a::Vector, r::AbstractUnitRange{<:Integer}, ins=_default_splice)
     v = a[r]
     m = length(ins)
     if m == 0
@@ -1590,7 +1716,7 @@ end
     reverse(v [, start=1 [, stop=length(v) ]] )
 
 Return a copy of `v` reversed from start to stop.  See also [`Iterators.reverse`](@ref)
-for reverse-order iteration without making a copy.
+for reverse-order iteration without making a copy, and in-place [`reverse!`](@ref).
 
 # Examples
 ```jldoctest
@@ -1734,7 +1860,7 @@ function vcat(arrays::Vector{T}...) where T
     return arr
 end
 
-_cat(n::Integer, x::Integer...) = reshape([x...], (ntuple(x->1, n-1)..., length(x)))
+_cat(n::Integer, x::Integer...) = reshape([x...], (ntuple(Returns(1), n-1)..., length(x)))
 
 ## find ##
 
@@ -1793,6 +1919,8 @@ To search for other kinds of values, pass a predicate as the first argument.
 Indices or keys are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
 
+See also: [`findall`](@ref), [`findnext`](@ref), [`findlast`](@ref), [`searchsortedfirst`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [false, false, true, false]
@@ -1934,6 +2062,8 @@ or `nothing` if not found.
 Indices are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
 
+See also: [`findnext`](@ref), [`findfirst`](@ref), [`findall`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [false, false, true, true]
@@ -1979,6 +2109,8 @@ Return `nothing` if there is no `true` value in `A`.
 Indices or keys are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
 
+See also: [`findfirst`](@ref), [`findprev`](@ref), [`findall`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [true, false, true, false]
@@ -2171,6 +2303,8 @@ To search for other kinds of values, pass a predicate as the first argument.
 Indices or keys are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
 
+See also: [`findfirst`](@ref), [`searchsorted`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [true, false, false, true]
@@ -2228,6 +2362,8 @@ Return an array containing the first index in `b` for
 each value in `a` that is a member of `b`. The output
 array contains `nothing` wherever `a` is not a member of `b`.
 
+See also: [`sortperm`](@ref), [`findfirst`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = ['a', 'b', 'c', 'b', 'd', 'a'];
@@ -2361,6 +2497,8 @@ The function `f` is passed one argument.
 !!! compat "Julia 1.4"
     Support for `a` as a tuple requires at least Julia 1.4.
 
+See also: [`filter!`](@ref), [`Iterators.filter`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = 1:10
@@ -2470,19 +2608,27 @@ function _shrink!(shrinker!, v::AbstractVector, itrs)
     seen = Set{eltype(v)}()
     filter!(_grow_filter!(seen), v)
     shrinker!(seen, itrs...)
-    filter!(_in(seen), v)
+    filter!(in(seen), v)
 end
 
 intersect!(v::AbstractVector, itrs...) = _shrink!(intersect!, v, itrs)
 setdiff!(  v::AbstractVector, itrs...) = _shrink!(setdiff!, v, itrs)
 
-vectorfilter(f, v::AbstractVector) = filter(f, v) # TODO: do we want this special case?
-vectorfilter(f, v) = [x for x in v if f(x)]
+vectorfilter(T::Type, f, v) = T[x for x in v if f(x)]
 
 function _shrink(shrinker!, itr, itrs)
-    keep = shrinker!(Set(itr), itrs...)
-    vectorfilter(_shrink_filter!(keep), itr)
+    T = promote_eltype(itr, itrs...)
+    keep = shrinker!(Set{T}(itr), itrs...)
+    vectorfilter(T, _shrink_filter!(keep), itr)
 end
 
 intersect(itr, itrs...) = _shrink(intersect!, itr, itrs)
 setdiff(  itr, itrs...) = _shrink(setdiff!, itr, itrs)
+
+function intersect(v::AbstractVector, r::AbstractRange)
+    T = promote_eltype(v, r)
+    common = Iterators.filter(in(r), v)
+    seen = Set{T}(common)
+    return vectorfilter(T, _shrink_filter!(seen), common)
+end
+intersect(r::AbstractRange, v::AbstractVector) = intersect(v, r)
diff --git a/base/arrayshow.jl b/base/arrayshow.jl
index f942f877874844..0d480b64bb32d4 100644
--- a/base/arrayshow.jl
+++ b/base/arrayshow.jl
@@ -59,7 +59,8 @@ column going across the screen.
 """
 function alignment(io::IO, @nospecialize(X::AbstractVecOrMat),
         rows::AbstractVector{T}, cols::AbstractVector{V},
-        cols_if_complete::Integer, cols_otherwise::Integer, sep::Integer) where {T,V}
+        cols_if_complete::Integer, cols_otherwise::Integer, sep::Integer,
+        #= `size(X) may not infer, set this in caller =# ncols::Integer=size(X, 2)) where {T,V}
     a = Tuple{T, V}[]
     for j in cols # need to go down each column one at a time
         l = r = 0
@@ -78,7 +79,7 @@ function alignment(io::IO, @nospecialize(X::AbstractVecOrMat),
             break
         end
     end
-    if 1 < length(a) < length(axes(X,2))
+    if 1 < length(a) < ncols
         while sum(map(sum,a)) + sep*length(a) >= cols_otherwise
             pop!(a)
         end
@@ -95,7 +96,8 @@ is specified as string sep.
 """
 function print_matrix_row(io::IO,
         @nospecialize(X::AbstractVecOrMat), A::Vector,
-        i::Integer, cols::AbstractVector, sep::AbstractString)
+        i::Integer, cols::AbstractVector, sep::AbstractString,
+        #= `axes(X)` may not infer, set this in caller =# idxlast::Integer=last(axes(X, 2)))
     for (k, j) = enumerate(cols)
         k > length(A) && break
         if isassigned(X,Int(i),Int(j)) # isassigned accepts only `Int` indices
@@ -114,7 +116,7 @@ function print_matrix_row(io::IO,
             sx = undef_ref_str
         end
         l = repeat(" ", A[k][1]-a[1]) # pad on left and right as needed
-        r = j == axes(X, 2)[end] ? "" : repeat(" ", A[k][2]-a[2])
+        r = j == idxlast ? "" : repeat(" ", A[k][2]-a[2])
         prettysx = replace_in_print_matrix(X,i,j,sx)
         print(io, l, prettysx, r)
         if k < length(A); print(io, sep); end
@@ -171,6 +173,7 @@ end
 
 function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, hdots, vdots, ddots, hmod, vmod, rowsA, colsA)
     hmod, vmod = Int(hmod)::Int, Int(vmod)::Int
+    ncols, idxlast = length(colsA), last(colsA)
     if !(get(io, :limit, false)::Bool)
         screenheight = screenwidth = typemax(Int)
     else
@@ -201,26 +204,26 @@ function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, h
     else
 	    colsA = [colsA;]
     end
-    A = alignment(io, X, rowsA, colsA, screenwidth, screenwidth, sepsize)
+    A = alignment(io, X, rowsA, colsA, screenwidth, screenwidth, sepsize, ncols)
     # Nine-slicing is accomplished using print_matrix_row repeatedly
     if m <= screenheight # rows fit vertically on screen
         if n <= length(A) # rows and cols fit so just print whole matrix in one piece
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,A,i,colsA,sep)
+                print_matrix_row(io, X,A,i,colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != last(rowsA); println(io); end
             end
         else # rows fit down screen but cols don't, so need horizontal ellipsis
             c = div(screenwidth-length(hdots)::Int+1,2)+1  # what goes to right of ellipsis
-            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize)) # alignments for right
+            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize, ncols)) # alignments for right
             c = screenwidth - sum(map(sum,Ralign)) - (length(Ralign)-1)*sepsize - length(hdots)::Int
-            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize) # alignments for left of ellipsis
+            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize, ncols) # alignments for left of ellipsis
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep)
+                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep,idxlast)
                 print(io, (i - first(rowsA)) % hmod == 0 ? hdots : repeat(" ", length(hdots)::Int))
-                print_matrix_row(io, X, Ralign, i, (n - length(Ralign)) .+ colsA, sep)
+                print_matrix_row(io, X, Ralign, i, (n - length(Ralign)) .+ colsA, sep, idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != last(rowsA); println(io); end
             end
@@ -229,7 +232,7 @@ function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, h
         if n <= length(A) # rows don't fit, cols do, so only vertical ellipsis
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,A,i,colsA,sep)
+                print_matrix_row(io, X,A,i,colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != rowsA[end] || i == rowsA[halfheight]; println(io); end
                 if i == rowsA[halfheight]
@@ -240,15 +243,15 @@ function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, h
             end
         else # neither rows nor cols fit, so use all 3 kinds of dots
             c = div(screenwidth-length(hdots)::Int+1,2)+1
-            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize))
+            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize, ncols))
             c = screenwidth - sum(map(sum,Ralign)) - (length(Ralign)-1)*sepsize - length(hdots)::Int
-            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize)
+            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize, ncols)
             r = mod((length(Ralign)-n+1),vmod) # where to put dots on right half
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep)
+                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep,idxlast)
                 print(io, (i - first(rowsA)) % hmod == 0 ? hdots : repeat(" ", length(hdots)::Int))
-                print_matrix_row(io, X,Ralign,i,(n-length(Ralign)).+colsA,sep)
+                print_matrix_row(io, X,Ralign,i,(n-length(Ralign)).+colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != rowsA[end] || i == rowsA[halfheight]; println(io); end
                 if i == rowsA[halfheight]
@@ -271,17 +274,21 @@ end
 
 # typeinfo agnostic
 # n-dimensional arrays
-show_nd(io::IO, a::AbstractArray, print_matrix::Function, label_slices::Bool) =
-    _show_nd(io, inferencebarrier(a), print_matrix, label_slices, map(unitrange, axes(a)))
+show_nd(io::IO, a::AbstractArray, print_matrix::Function, show_full::Bool) =
+    _show_nd(io, inferencebarrier(a), print_matrix, show_full, map(unitrange, axes(a)))
 
-function _show_nd(io::IO, @nospecialize(a::AbstractArray), print_matrix::Function, label_slices::Bool, axs::Tuple{Vararg{AbstractUnitRange}})
+function _show_nd(io::IO, @nospecialize(a::AbstractArray), print_matrix::Function, show_full::Bool, axs::Tuple{Vararg{AbstractUnitRange}})
     limit::Bool = get(io, :limit, false)
     if isempty(a)
         return
     end
     tailinds = tail(tail(axs))
     nd = ndims(a)-2
-    for I in CartesianIndices(tailinds)
+    show_full || print(io, "[")
+    Is = CartesianIndices(tailinds)
+    lastidxs = first(Is).I
+    reached_last_d = false
+    for I in Is
         idxs = I.I
         if limit
             for i = 1:nd
@@ -296,7 +303,9 @@ function _show_nd(io::IO, @nospecialize(a::AbstractArray), print_matrix::Functio
                                 @goto skip
                             end
                         end
-                        print(io, "...\n\n")
+                        print(io, ";"^(i+2))
+                        print(io, " \u2026 ")
+                        show_full && print(io, "\n\n")
                         @goto skip
                     end
                     if ind[firstindex(ind)+2] < ii <= ind[end-3]
@@ -305,13 +314,29 @@ function _show_nd(io::IO, @nospecialize(a::AbstractArray), print_matrix::Functio
                 end
             end
         end
-        if label_slices
+        if show_full
             _show_nd_label(io, a, idxs)
         end
         slice = view(a, axs[1], axs[2], idxs...)
-        print_matrix(io, slice)
-        print(io, idxs == map(last,tailinds) ? "" : "\n\n")
+        if show_full
+            print_matrix(io, slice)
+            print(io, idxs == map(last,tailinds) ? "" : "\n\n")
+        else
+            idxdiff = lastidxs .- idxs .< 0
+            if any(idxdiff)
+                lastchangeindex = 2 + findlast(idxdiff)
+                print(io, ";"^lastchangeindex)
+                lastchangeindex == ndims(a) && (reached_last_d = true)
+                print(io, " ")
+            end
+            print_matrix(io, slice)
+        end
         @label skip
+        lastidxs = idxs
+    end
+    if !show_full
+        reached_last_d || print(io, ";"^(nd+2))
+        print(io, "]")
     end
 end
 
@@ -386,9 +411,9 @@ end
 preceded by `prefix`, supposed to encode the type of the elements.
 """
 _show_nonempty(io::IO, X::AbstractMatrix, prefix::String) =
-    _show_nonempty(io, inferencebarrier(X), prefix, axes(X))
+    _show_nonempty(io, inferencebarrier(X), prefix, false, axes(X))
 
-function _show_nonempty(io::IO, @nospecialize(X::AbstractMatrix), prefix::String, axs::Tuple{AbstractUnitRange,AbstractUnitRange})
+function _show_nonempty(io::IO, @nospecialize(X::AbstractMatrix), prefix::String, drop_brackets::Bool, axs::Tuple{AbstractUnitRange,AbstractUnitRange})
     @assert !isempty(X)
     limit = get(io, :limit, false)::Bool
     indr, indc = axs
@@ -407,7 +432,7 @@ function _show_nonempty(io::IO, @nospecialize(X::AbstractMatrix), prefix::String
             cdots = true
         end
     end
-    print(io, prefix, "[")
+    drop_brackets || print(io, prefix, "[")
     for rr in (rr1, rr2)
         for i in rr
             for cr in (cr1, cr2)
@@ -429,12 +454,16 @@ function _show_nonempty(io::IO, @nospecialize(X::AbstractMatrix), prefix::String
         end
         last(rr) != last(indr) && rdots && print(io, "\u2026 ; ")
     end
-    print(io, "]")
+    if !drop_brackets
+        nc > 1 || print(io, ";;")
+        print(io, "]")
+    end
+    return nothing
 end
 
 
 _show_nonempty(io::IO, X::AbstractArray, prefix::String) =
-    show_nd(io, X, (io, slice) -> _show_nonempty(io, slice, prefix), false)
+    show_nd(io, X, (io, slice) -> _show_nonempty(io, inferencebarrier(slice), prefix, true, axes(slice)), false)
 
 # a specific call path is used to show vectors (show_vector)
 _show_nonempty(::IO, ::AbstractVector, ::String) =
diff --git a/base/atomics.jl b/base/atomics.jl
index 97405d88fd4080..e6d62c3fc807b2 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -356,13 +356,13 @@ for typ in atomictypes
     rt = "$lt, $lt*"
     irt = "$ilt, $ilt*"
     @eval getindex(x::Atomic{$typ}) =
-        llvmcall($"""
+        GC.@preserve x llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                  %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
                  ret $lt %rv
                  """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
     @eval setindex!(x::Atomic{$typ}, v::$typ) =
-        llvmcall($"""
+        GC.@preserve x llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                  store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
                  ret void
@@ -371,7 +371,7 @@ for typ in atomictypes
     # Note: atomic_cas! succeeded (i.e. it stored "new") if and only if the result is "cmp"
     if typ <: Integer
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            llvmcall($"""
+            GC.@preserve x llvmcall($"""
                      %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                      %rs = cmpxchg $lt* %ptr, $lt %1, $lt %2 acq_rel acquire
                      %rv = extractvalue { $lt, i1 } %rs, 0
@@ -380,7 +380,7 @@ for typ in atomictypes
                      unsafe_convert(Ptr{$typ}, x), cmp, new)
     else
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            llvmcall($"""
+            GC.@preserve x llvmcall($"""
                      %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
                      %icmp = bitcast $lt %1 to $ilt
                      %inew = bitcast $lt %2 to $ilt
@@ -403,7 +403,7 @@ for typ in atomictypes
         if rmwop in arithmetic_ops && !(typ <: ArithmeticTypes) continue end
         if typ <: Integer
             @eval $fn(x::Atomic{$typ}, v::$typ) =
-                llvmcall($"""
+                GC.@preserve x llvmcall($"""
                          %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                          %rv = atomicrmw $rmw $lt* %ptr, $lt %1 acq_rel
                          ret $lt %rv
@@ -411,7 +411,7 @@ for typ in atomictypes
         else
             rmwop === :xchg || continue
             @eval $fn(x::Atomic{$typ}, v::$typ) =
-                llvmcall($"""
+                GC.@preserve x llvmcall($"""
                          %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
                          %ival = bitcast $lt %1 to $ilt
                          %irv = atomicrmw $rmw $ilt* %iptr, $ilt %ival acq_rel
diff --git a/base/baseext.jl b/base/baseext.jl
index 75ef96caa94be3..8ebd599312453e 100644
--- a/base/baseext.jl
+++ b/base/baseext.jl
@@ -2,6 +2,17 @@
 
 # extensions to Core types to add features in Base
 
+"""
+    VecElement{T}
+
+A wrapper type that holds a single value of type `T`. When used in the context of an
+`NTuple{N, VecElement{T}} where {T, N}` object, it provides a hint to the runtime
+system to align that struct to be more amenable to vectorization optimization
+opportunities. In `ccall`, such an NTuple in the type signature will also use the
+vector register ABI, rather than the usual struct ABI.
+"""
+VecElement
+
 # hook up VecElement constructor to Base.convert
 VecElement{T}(arg) where {T} = VecElement{T}(convert(T, arg))
 convert(::Type{T}, arg::T) where {T<:VecElement} = arg
diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index 8c9c6768090e0a..aff1de4a809936 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -141,7 +141,7 @@ function Base.setindex!(p::AbstractPlatform, v::String, k::String)
     return p
 end
 
-# Hash definitino to ensure that it's stable
+# Hash definition to ensure that it's stable
 function Base.hash(p::Platform, h::UInt)
     h += 0x506c6174666f726d % UInt
     h = hash(p.tags, h)
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 6175a492cac75f..6fe94df3785160 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -703,7 +703,7 @@ end
 indexoffset(i) = first(i)-1
 indexoffset(::Colon) = 0
 
-@propagate_inbounds function setindex!(B::BitArray, X::AbstractArray, J0::Union{Colon,UnitRange{Int}})
+@propagate_inbounds function setindex!(B::BitArray, X::AbstractArray, J0::Union{Colon,AbstractUnitRange{Int}})
     _setindex!(IndexStyle(B), B, X, to_indices(B, (J0,))[1])
 end
 
@@ -954,7 +954,7 @@ function deleteat!(B::BitVector, i::Integer)
     return _deleteat!(B, i)
 end
 
-function deleteat!(B::BitVector, r::UnitRange{Int})
+function deleteat!(B::BitVector, r::AbstractUnitRange{Int})
     n = length(B)
     i_f = first(r)
     i_l = last(r)
@@ -1031,8 +1031,8 @@ end
 
 const _default_bit_splice = BitVector()
 
-function splice!(B::BitVector, r::Union{UnitRange{Int}, Integer}, ins::AbstractArray = _default_bit_splice)
-    _splice_int!(B, isa(r, UnitRange{Int}) ? r : Int(r), ins)
+function splice!(B::BitVector, r::Union{AbstractUnitRange{Int}, Integer}, ins::AbstractArray = _default_bit_splice)
+    _splice_int!(B, isa(r, AbstractUnitRange{Int}) ? r : Int(r), ins)
 end
 function _splice_int!(B::BitVector, r, ins)
     n = length(B)
@@ -1073,7 +1073,7 @@ function _splice_int!(B::BitVector, r, ins)
     return v
 end
 
-function splice!(B::BitVector, r::Union{UnitRange{Int}, Integer}, ins)
+function splice!(B::BitVector, r::Union{AbstractUnitRange{Int}, Integer}, ins)
     Bins = BitVector(undef, length(ins))
     i = 1
     for x in ins
@@ -1707,6 +1707,8 @@ map!(::typeof(identity), dest::BitArray, A::BitArray) = copyto!(dest, A)
 for (T, f) in ((:(Union{typeof(&), typeof(*), typeof(min)}), :(&)),
                (:(Union{typeof(|), typeof(max)}),            :(|)),
                (:(Union{typeof(xor), typeof(!=)}),           :xor),
+               (:(typeof(nand)),                             :nand),
+               (:(typeof(nor)),                              :nor),
                (:(Union{typeof(>=), typeof(^)}),             :((p, q) -> p | ~q)),
                (:(typeof(<=)),                               :((p, q) -> ~p | q)),
                (:(typeof(==)),                               :((p, q) -> ~xor(p, q))),
diff --git a/base/bool.jl b/base/bool.jl
index 92a27543d2fbc1..d42cfb0603279d 100644
--- a/base/bool.jl
+++ b/base/bool.jl
@@ -14,6 +14,8 @@ typemax(::Type{Bool}) = true
 Boolean not. Implements [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
 returning [`missing`](@ref) if `x` is `missing`.
 
+See also [`~`](@ref) for bitwise not.
+
 # Examples
 ```jldoctest
 julia> !true
@@ -70,6 +72,74 @@ julia> [true; true; false] .⊻ [true; false; false]
 """
 xor(x::Bool, y::Bool) = (x != y)
 
+"""
+    nand(x, y)
+    ⊼(x, y)
+
+Bitwise nand (not and) of `x` and `y`. Implements
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
+returning [`missing`](@ref) if one of the arguments is `missing`.
+
+The infix operation `a ⊼ b` is a synonym for `nand(a,b)`, and
+`⊼` can be typed by tab-completing `\\nand` or `\\barwedge` in the Julia REPL.
+
+# Examples
+```jldoctest
+julia> nand(true, false)
+true
+
+julia> nand(true, true)
+false
+
+julia> nand(true, missing)
+missing
+
+julia> false ⊼ false
+true
+
+julia> [true; true; false] .⊼ [true; false; false]
+3-element BitVector:
+ 0
+ 1
+ 1
+```
+"""
+nand(x...) = ~(&)(x...)
+
+"""
+    nor(x, y)
+    ⊽(x, y)
+
+Bitwise nor (not or) of `x` and `y`. Implements
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
+returning [`missing`](@ref) if one of the arguments is `missing`.
+
+The infix operation `a ⊽ b` is a synonym for `nor(a,b)`, and
+`⊽` can be typed by tab-completing `\\nor` or `\\veebar` in the Julia REPL.
+
+# Examples
+```jldoctest
+julia> nor(true, false)
+false
+
+julia> nor(true, true)
+false
+
+julia> nor(true, missing)
+false
+
+julia> false ⊽ false
+true
+
+julia> [true; true; false] .⊽ [true; false; false]
+3-element BitVector:
+ 0
+ 0
+ 1
+```
+"""
+nor(x...) = ~(|)(x...)
+
 >>(x::Bool, c::UInt) = Int(x) >> c
 <<(x::Bool, c::UInt) = Int(x) << c
 >>>(x::Bool, c::UInt) = Int(x) >>> c
diff --git a/base/boot.jl b/base/boot.jl
index 200c7561f5ec64..4503bc9ab74c6a 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -171,7 +171,7 @@ export
     # key types
     Any, DataType, Vararg, NTuple,
     Tuple, Type, UnionAll, TypeVar, Union, Nothing, Cvoid,
-    AbstractArray, DenseArray, NamedTuple,
+    AbstractArray, DenseArray, NamedTuple, Pair,
     # special objects
     Function, Method,
     Module, Symbol, Task, Array, UndefInitializer, undef, WeakRef, VecElement,
@@ -187,11 +187,12 @@ export
     InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
     OverflowError, StackOverflowError, SegmentationFault, UndefRefError, UndefVarError,
     TypeError, ArgumentError, MethodError, AssertionError, LoadError, InitError,
-    UndefKeywordError,
+    UndefKeywordError, ConcurrencyViolationError,
     # AST representation
     Expr, QuoteNode, LineNumberNode, GlobalRef,
     # object model functions
-    fieldtype, getfield, setfield!, nfields, throw, tuple, ===, isdefined, eval, ifelse,
+    fieldtype, getfield, setfield!, swapfield!, modifyfield!, replacefield!,
+    nfields, throw, tuple, ===, isdefined, eval, ifelse,
     # sizeof    # not exported, to avoid conflicting with Base.sizeof
     # type reflection
     <:, typeof, isa, typeassert,
@@ -266,20 +267,15 @@ struct ErrorException <: Exception
     msg::AbstractString
 end
 
-macro _inline_meta()
-    Expr(:meta, :inline)
-end
-
-macro _noinline_meta()
-    Expr(:meta, :noinline)
-end
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
 
 struct BoundsError <: Exception
     a::Any
     i::Any
     BoundsError() = new()
-    BoundsError(@nospecialize(a)) = (@_noinline_meta; new(a))
-    BoundsError(@nospecialize(a), i) = (@_noinline_meta; new(a,i))
+    BoundsError(@nospecialize(a)) = (@noinline; new(a))
+    BoundsError(@nospecialize(a), i) = (@noinline; new(a,i))
 end
 struct DivideError         <: Exception end
 struct OutOfMemoryError    <: Exception end
@@ -290,12 +286,15 @@ struct UndefRefError       <: Exception end
 struct UndefVarError <: Exception
     var::Symbol
 end
+struct ConcurrencyViolationError <: Exception
+    msg::AbstractString
+end
 struct InterruptException <: Exception end
 struct DomainError <: Exception
     val
     msg::AbstractString
-    DomainError(@nospecialize(val)) = (@_noinline_meta; new(val, ""))
-    DomainError(@nospecialize(val), @nospecialize(msg)) = (@_noinline_meta; new(val, msg))
+    DomainError(@nospecialize(val)) = (@noinline; new(val, ""))
+    DomainError(@nospecialize(val), @nospecialize(msg)) = (@noinline; new(val, msg))
 end
 struct TypeError <: Exception
     # `func` is the name of the builtin function that encountered a type error,
@@ -316,7 +315,7 @@ struct InexactError <: Exception
     func::Symbol
     T  # Type
     val
-    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@_noinline_meta; new(f, T, val))
+    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@noinline; new(f, T, val))
 end
 struct OverflowError <: Exception
     msg::AbstractString
@@ -606,26 +605,26 @@ eval(Core, :(NamedTuple{names,T}(args::T) where {names, T <: Tuple} =
 
 import .Intrinsics: eq_int, trunc_int, lshr_int, sub_int, shl_int, bitcast, sext_int, zext_int, and_int
 
-throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@_noinline_meta; throw(InexactError(f, T, val)))
+throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@noinline; throw(InexactError(f, T, val)))
 
 function is_top_bit_set(x)
-    @_inline_meta
+    @inline
     eq_int(trunc_int(UInt8, lshr_int(x, sub_int(shl_int(sizeof(x), 3), 1))), trunc_int(UInt8, 1))
 end
 
 function is_top_bit_set(x::Union{Int8,UInt8})
-    @_inline_meta
+    @inline
     eq_int(lshr_int(x, 7), trunc_int(typeof(x), 1))
 end
 
 function check_top_bit(::Type{To}, x) where {To}
-    @_inline_meta
+    @inline
     is_top_bit_set(x) && throw_inexacterror(:check_top_bit, To, x)
     x
 end
 
 function checked_trunc_sint(::Type{To}, x::From) where {To,From}
-    @_inline_meta
+    @inline
     y = trunc_int(To, x)
     back = sext_int(From, y)
     eq_int(x, back) || throw_inexacterror(:trunc, To, x)
@@ -633,7 +632,7 @@ function checked_trunc_sint(::Type{To}, x::From) where {To,From}
 end
 
 function checked_trunc_uint(::Type{To}, x::From) where {To,From}
-    @_inline_meta
+    @inline
     y = trunc_int(To, x)
     back = zext_int(From, y)
     eq_int(x, back) || throw_inexacterror(:trunc, To, x)
@@ -810,4 +809,16 @@ _parse = nothing
 # support for deprecated uses of internal _apply function
 _apply(x...) = Core._apply_iterate(Main.Base.iterate, x...)
 
+struct Pair{A, B}
+    first::A
+    second::B
+    # if we didn't inline this, it's probably because the callsite was actually dynamic
+    # to avoid potentially compiling many copies of this, we mark the arguments with `@nospecialize`
+    # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
+    # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
+    Pair(a, b) = new{typeof(a), typeof(b)}(a, b)
+    Pair{A, B}(a::A, b::B) where {A, B} = new(a, b)
+    Pair{Any, Any}(@nospecialize(a::Any), @nospecialize(b::Any)) = new(a, b)
+end
+
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index de3263a64011b8..90479189ffee4d 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -8,10 +8,10 @@ Module containing the broadcasting implementation.
 module Broadcast
 
 using .Base.Cartesian
-using .Base: Indices, OneTo, tail, to_shape, isoperator, promote_typejoin, @pure,
+using .Base: Indices, OneTo, tail, to_shape, isoperator, promote_typejoin, promote_typejoin_union, @pure,
              _msk_end, unsafe_bitgetindex, bitcache_chunks, bitcache_size, dumpbitcache, unalias
 import .Base: copy, copyto!, axes
-export broadcast, broadcast!, BroadcastStyle, broadcast_axes, broadcastable, dotview, @__dot__, broadcast_preserving_zero_d, BroadcastFunction
+export broadcast, broadcast!, BroadcastStyle, broadcast_axes, broadcastable, dotview, @__dot__, BroadcastFunction
 
 ## Computing the result's axes: deprecated name
 const broadcast_axes = axes
@@ -179,6 +179,21 @@ function Broadcasted{Style}(f::F, args::Args, axes=nothing) where {Style, F, Arg
     Broadcasted{Style, typeof(axes), Core.Typeof(f), Args}(f, args, axes)
 end
 
+struct AndAnd end
+andand = AndAnd()
+broadcasted(::AndAnd, a, b) = broadcasted((a, b) -> a && b, a, b)
+function broadcasted(::AndAnd, a, bc::Broadcasted)
+    bcf = flatten(bc)
+    broadcasted((a, args...) -> a && bcf.f(args...), a, bcf.args...)
+end
+struct OrOr end
+const oror = OrOr()
+broadcasted(::OrOr, a, b) = broadcasted((a, b) -> a || b, a, b)
+function broadcasted(::OrOr, a, bc::Broadcasted)
+    bcf = flatten(bc)
+    broadcasted((a, args...) -> a || bcf.f(args...), a, bcf.args...)
+end
+
 Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{Style,Axes,F,Args}) where {NewStyle,Style,Axes,F,Args} =
     Broadcasted{NewStyle,Axes,F,Args}(bc.f, bc.args, bc.axes)
 
@@ -551,7 +566,7 @@ an `Int`.
 """
 Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = CartesianIndex(_newindex(axes(arg), I.I))
 Base.@propagate_inbounds newindex(arg, I::Integer) = CartesianIndex(_newindex(axes(arg), (I,)))
-Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(Base.unsafe_length(ax[1])==1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
+Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(length(ax[1]) == 1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple) = ()
 Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple{}) = (ax[1][1], _newindex(tail(ax), ())...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()
@@ -684,7 +699,7 @@ Base.RefValue{String}("hello")
 """
 broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair}) = Ref(x)
 broadcastable(::Type{T}) where {T} = Ref{Type{T}}(T)
-broadcastable(x::Union{AbstractArray,Number,Ref,Tuple,Broadcasted}) = x
+broadcastable(x::Union{AbstractArray,Number,AbstractChar,Ref,Tuple,Broadcasted}) = x
 # Default to collecting iterables — which will error for non-iterables
 broadcastable(x) = collect(x)
 broadcastable(::Union{AbstractDict, NamedTuple}) = throw(ArgumentError("broadcasting over dictionaries and `NamedTuple`s is reserved"))
@@ -698,50 +713,6 @@ eltypes(t::Tuple{Any}) = Tuple{_broadcast_getindex_eltype(t[1])}
 eltypes(t::Tuple{Any,Any}) = Tuple{_broadcast_getindex_eltype(t[1]), _broadcast_getindex_eltype(t[2])}
 eltypes(t::Tuple) = Tuple{_broadcast_getindex_eltype(t[1]), eltypes(tail(t)).types...}
 
-function promote_typejoin_union(::Type{T}) where T
-    if T === Union{}
-        return Union{}
-    elseif T isa UnionAll
-        return Any # TODO: compute more precise bounds
-    elseif T isa Union
-        return promote_typejoin(promote_typejoin_union(T.a), promote_typejoin_union(T.b))
-    elseif T <: Tuple
-        return typejoin_union_tuple(T)
-    else
-        return T
-    end
-end
-
-@pure function typejoin_union_tuple(T::Type)
-    u = Base.unwrap_unionall(T)
-    u isa Union && return typejoin(
-            typejoin_union_tuple(Base.rewrap_unionall(u.a, T)),
-            typejoin_union_tuple(Base.rewrap_unionall(u.b, T)))
-    p = (u::DataType).parameters
-    lr = length(p)::Int
-    if lr == 0
-        return Tuple{}
-    end
-    c = Vector{Any}(undef, lr)
-    for i = 1:lr
-        pi = p[i]
-        U = Core.Compiler.unwrapva(pi)
-        if U === Union{}
-            ci = Union{}
-        elseif U isa Union
-            ci = typejoin(U.a, U.b)
-        else
-            ci = U
-        end
-        if i == lr && Core.Compiler.isvarargtype(pi)
-            c[i] = isdefined(pi, :N) ? Vararg{ci, pi.N} : Vararg{ci}
-        else
-            c[i] = ci
-        end
-    end
-    return Base.rewrap_unionall(Tuple{c...}, T)
-end
-
 # Inferred eltype of result of broadcast(f, args...)
 combine_eltypes(f, args::Tuple) =
     promote_typejoin_union(Base._return_type(f, eltypes(args)))
@@ -1106,19 +1077,20 @@ end
 
 ## scalar-range broadcast operations ##
 # DefaultArrayStyle and \ are not available at the time of range.jl
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::OrdinalRange) = r
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::StepRangeLen) = r
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::LinRange) = r
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractRange) = r
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange) = range(-first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange) = range(-first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange) = range(-first(r), -last(r), step=-step(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::StepRangeLen) = StepRangeLen(-r.ref, -r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::LinRange) = LinRange(-r.start, -r.stop, length(r))
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::AbstractUnitRange) = range(x + first(r), length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Real) = range(first(r) + x, length=length(r))
 # For #18336 we need to prevent promotion of the step type:
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractRange, x::Number) = range(first(r) + x, step=step(r), length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::AbstractRange) = range(x + first(r), step=step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::OrdinalRange, x::Real) = range(first(r) + x, last(r) + x, step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::Real) = range(x + first(r), x + last(r), step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Real) = range(first(r) + x, last(r) + x)
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::AbstractUnitRange) = range(x + first(r), x + last(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::StepRangeLen{T}, x::Number) where T =
     StepRangeLen{typeof(T(r.ref)+x)}(r.ref + x, r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::StepRangeLen{T}) where T =
@@ -1127,9 +1099,11 @@ broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::LinRange, x::Number) = LinRa
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::LinRange) = LinRange(x + r.start, x + r.stop, length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r1::AbstractRange, r2::AbstractRange) = r1 + r2
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Number) = range(first(r)-x, length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange, x::Number) = range(first(r)-x, step=step(r), length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::AbstractRange) = range(x-first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange, x::Number) = range(first(r) - x, step=step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::AbstractRange) = range(x - first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange, x::Real) = range(first(r) - x, last(r) - x, step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Real, r::OrdinalRange) = range(x - first(r), x - last(r), step=-step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Real) = range(first(r) - x, last(r) - x)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::StepRangeLen{T}, x::Number) where T =
     StepRangeLen{typeof(T(r.ref)-x)}(r.ref - x, r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::StepRangeLen{T}) where T =
@@ -1138,15 +1112,20 @@ broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::LinRange, x::Number) = LinRa
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::LinRange) = LinRange(x - r.start, x - r.stop, length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r1::AbstractRange, r2::AbstractRange) = r1 - r2
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::Number, r::AbstractRange) = range(x*first(r), step=x*step(r), length=length(r))
+# at present Base.range_start_step_length(1,0,5) is an error, so for 0 .* (-2:2) we explicitly construct StepRangeLen:
+broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::Number, r::AbstractRange) = StepRangeLen(x*first(r), x*step(r), length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::Number, r::StepRangeLen{T}) where {T} =
     StepRangeLen{typeof(x*T(r.ref))}(x*r.ref, x*r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::Number, r::LinRange) = LinRange(x * r.start, x * r.stop, r.len)
-# separate in case of noncommutative multiplication
-broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::AbstractRange, x::Number) = range(first(r)*x, step=step(r)*x, length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::AbstractFloat, r::OrdinalRange) =
+    Base.range_start_step_length(x*first(r), x*step(r), length(r))  # 0.2 .* (-2:2) needs TwicePrecision
+# separate in case of noncommutative multiplication:
+broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::AbstractRange, x::Number) = StepRangeLen(first(r)*x, step(r)*x, length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::StepRangeLen{T}, x::Number) where {T} =
     StepRangeLen{typeof(T(r.ref)*x)}(r.ref*x, r.step*x, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::LinRange, x::Number) = LinRange(r.start * x, r.stop * x, r.len)
+broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::OrdinalRange, x::AbstractFloat) =
+    Base.range_start_step_length(first(r)*x, step(r)*x, length(r))
 
 broadcasted(::DefaultArrayStyle{1}, ::typeof(/), r::AbstractRange, x::Number) = range(first(r)/x, step=step(r)/x, length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(/), r::StepRangeLen{T}, x::Number) where {T} =
@@ -1257,15 +1236,9 @@ function __dot__(x::Expr)
         tmp = x.head === :(<:) ? :.<: : :.>:
         Expr(:call, tmp, dotargs...)
     else
-        if x.head === :&& || x.head === :||
-            error("""
-                Using `&&` and `||` is disallowed in `@.` expressions.
-                Use `&` or `|` for elementwise logical operations.
-                """)
-        end
-        head = string(x.head)
-        if last(head) == '=' && first(head) != '.'
-            Expr(Symbol('.',head), dotargs...)
+        head = String(x.head)::String
+        if last(head) == '=' && first(head) != '.' || head == "&&" || head == "||"
+            Expr(Symbol('.', head), dotargs...)
         else
             Expr(x.head, dotargs...)
         end
@@ -1300,7 +1273,13 @@ macro __dot__(x)
     esc(__dot__(x))
 end
 
-@inline broadcasted_kwsyntax(f, args...; kwargs...) = broadcasted((args...)->f(args...; kwargs...), args...)
+@inline function broadcasted_kwsyntax(f, args...; kwargs...)
+    if isempty(kwargs) # some BroadcastStyles dispatch on `f`, so try to preserve its type
+        return broadcasted(f, args...)
+    else
+        return broadcasted((args...) -> f(args...; kwargs...), args...)
+    end
+end
 @inline function broadcasted(f, args...)
     args′ = map(broadcastable, args)
     broadcasted(combine_styles(args′...), f, args′...)
diff --git a/base/channels.jl b/base/channels.jl
index fc6e1381b64b51..31cbd98a2abb26 100644
--- a/base/channels.jl
+++ b/base/channels.jl
@@ -295,9 +295,10 @@ function close_chnl_on_taskdone(t::Task, c::Channel)
 end
 
 struct InvalidStateException <: Exception
-    msg::AbstractString
+    msg::String
     state::Symbol
 end
+showerror(io::IO, ex::InvalidStateException) = print(io, "InvalidStateException: ", ex.msg)
 
 """
     put!(c::Channel, v)
@@ -422,6 +423,7 @@ n_avail(c::Channel) = isbuffered(c) ? length(c.data) : length(c.cond_put.waitq)
 isempty(c::Channel) = isbuffered(c) ? isempty(c.data) : isempty(c.cond_put.waitq)
 
 lock(c::Channel) = lock(c.cond_take)
+lock(f, c::Channel) = lock(f, c.cond_take)
 unlock(c::Channel) = unlock(c.cond_take)
 trylock(c::Channel) = trylock(c.cond_take)
 
diff --git a/base/char.jl b/base/char.jl
index a4071d37e41b27..0584471cb6a337 100644
--- a/base/char.jl
+++ b/base/char.jl
@@ -108,8 +108,9 @@ end
     ismalformed(c::AbstractChar) -> Bool
 
 Return `true` if `c` represents malformed (non-Unicode) data according to the
-encoding used by `c`.  Defaults to `false` for non-`Char` types.  See also
-[`show_invalid`](@ref).
+encoding used by `c`. Defaults to `false` for non-`Char` types.
+
+See also [`show_invalid`](@ref).
 """
 ismalformed(c::AbstractChar) = false
 
@@ -117,8 +118,9 @@ ismalformed(c::AbstractChar) = false
     isoverlong(c::AbstractChar) -> Bool
 
 Return `true` if `c` represents an overlong UTF-8 sequence. Defaults
-to `false` for non-`Char` types.  See also [`decode_overlong`](@ref)
-and [`show_invalid`](@ref).
+to `false` for non-`Char` types.
+
+See also [`decode_overlong`](@ref) and [`show_invalid`](@ref).
 """
 isoverlong(c::AbstractChar) = false
 
diff --git a/base/checked.jl b/base/checked.jl
index 840015861923fc..ad92a44e1e5bca 100644
--- a/base/checked.jl
+++ b/base/checked.jl
@@ -6,14 +6,14 @@ module Checked
 
 export checked_neg, checked_abs, checked_add, checked_sub, checked_mul,
        checked_div, checked_rem, checked_fld, checked_mod, checked_cld,
-       add_with_overflow, sub_with_overflow, mul_with_overflow
+       checked_length, add_with_overflow, sub_with_overflow, mul_with_overflow
 
 import Core.Intrinsics:
        checked_sadd_int, checked_ssub_int, checked_smul_int, checked_sdiv_int,
        checked_srem_int,
        checked_uadd_int, checked_usub_int, checked_umul_int, checked_udiv_int,
        checked_urem_int
-import ..no_op_err, ..@_inline_meta, ..@_noinline_meta
+import ..no_op_err, ..@inline, ..@noinline, ..checked_length
 
 # define promotion behavior for checked operations
 checked_add(x::Integer, y::Integer) = checked_add(promote(x,y)...)
@@ -86,7 +86,7 @@ The overflow protection may impose a perceptible performance penalty.
 function checked_neg(x::T) where T<:Integer
     checked_sub(T(0), x)
 end
-throw_overflowerr_negation(x) = (@_noinline_meta;
+throw_overflowerr_negation(x) = (@noinline;
     throw(OverflowError(Base.invokelatest(string, "checked arithmetic: cannot compute -x for x = ", x, "::", typeof(x)))))
 if BrokenSignedInt != Union{}
 function checked_neg(x::BrokenSignedInt)
@@ -150,7 +150,7 @@ end
 end
 
 
-throw_overflowerr_binaryop(op, x, y) = (@_noinline_meta;
+throw_overflowerr_binaryop(op, x, y) = (@noinline;
     throw(OverflowError(Base.invokelatest(string, x, " ", op, " ", y, " overflowed for type ", typeof(x)))))
 
 """
@@ -161,7 +161,7 @@ Calculates `x+y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_add(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = add_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:+, x, y)
     z
@@ -218,7 +218,7 @@ Calculates `x-y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_sub(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = sub_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:-, x, y)
     z
@@ -283,7 +283,7 @@ Calculates `x*y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_mul(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = mul_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:*, x, y)
     z
@@ -349,4 +349,12 @@ The overflow protection may impose a perceptible performance penalty.
 """
 checked_cld(x::T, y::T) where {T<:Integer} = cld(x, y) # Base.cld already checks
 
+"""
+    Base.checked_length(r)
+
+Calculates `length(r)`, but may check for overflow errors where applicable when
+the result doesn't fit into `Union{Integer(eltype(r)),Int}`.
+"""
+checked_length(r) = length(r) # for most things, length doesn't error
+
 end
diff --git a/base/client.jl b/base/client.jl
index d9df2f04922dd8..7e5f1ab5c5d58e 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -98,13 +98,13 @@ function display_error(io::IO, er, bt)
     showerror(IOContext(io, :limit => true), er, bt, backtrace = bt!==nothing)
     println(io)
 end
-function display_error(io::IO, stack::Vector)
+function display_error(io::IO, stack::ExceptionStack)
     printstyled(io, "ERROR: "; bold=true, color=Base.error_color())
     bt = Any[ (x[1], scrub_repl_backtrace(x[2])) for x in stack ]
     show_exception_stack(IOContext(io, :limit => true), bt)
     println(io)
 end
-display_error(stack::Vector) = display_error(stderr, stack)
+display_error(stack::ExceptionStack) = display_error(stderr, stack)
 display_error(er, bt=nothing) = display_error(stderr, er, bt)
 
 function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
@@ -143,7 +143,7 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
                 @error "SYSTEM: display_error(errio, lasterr) caused an error"
             end
             errcount += 1
-            lasterr = catch_stack()
+            lasterr = current_exceptions()
             if errcount > 2
                 @error "It is likely that something important is broken, and Julia will not be able to continue normally" errcount
                 break
@@ -257,7 +257,7 @@ function exec_options(opts)
         try
             load_julia_startup()
         catch
-            invokelatest(display_error, catch_stack())
+            invokelatest(display_error, current_exceptions())
             !(repl || is_interactive) && exit(1)
         end
     end
@@ -291,7 +291,7 @@ function exec_options(opts)
         try
             include(Main, PROGRAM_FILE)
         catch
-            invokelatest(display_error, catch_stack())
+            invokelatest(display_error, current_exceptions())
             if !is_interactive::Bool
                 exit(1)
             end
@@ -494,7 +494,7 @@ function _start()
     try
         exec_options(JLOptions())
     catch
-        invokelatest(display_error, catch_stack())
+        invokelatest(display_error, current_exceptions())
         exit(1)
     end
     if is_interactive && get(stdout, :color, false)
diff --git a/base/cmd.jl b/base/cmd.jl
index 809bc0f3c0a573..0c2a22e6cf852f 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -63,7 +63,7 @@ while changing the settings of the optional keyword arguments:
   array or tuple of `"var"=>val` pairs. In order to modify (rather than replace) the
   existing environment, initialize `env` with `copy(ENV)` and then set `env["var"]=val` as
   desired.  To add to an environment block within a `Cmd` object without replacing all
-  elements, use `addenv()` which will return a `Cmd` object with the updated environment.
+  elements, use [`addenv()`](@ref) which will return a `Cmd` object with the updated environment.
 * `dir::AbstractString`: Specify a working directory for the command (instead
   of the current directory).
 
@@ -103,6 +103,8 @@ shell_escape(cmd::Cmd; special::AbstractString="") =
     shell_escape(cmd.exec..., special=special)
 shell_escape_posixly(cmd::Cmd) =
     shell_escape_posixly(cmd.exec...)
+shell_escape_csh(cmd::Cmd) =
+    shell_escape_csh(cmd.exec...)
 escape_microsoft_c_args(cmd::Cmd) =
     escape_microsoft_c_args(cmd.exec...)
 escape_microsoft_c_args(io::IO, cmd::Cmd) =
@@ -165,6 +167,7 @@ rawhandle(x::OS_HANDLE) = x
 if OS_HANDLE !== RawFD
     rawhandle(x::RawFD) = Libc._get_osfhandle(x)
 end
+setup_stdio(stdio::Union{DevNull,OS_HANDLE,RawFD}, ::Bool) = (stdio, false)
 
 const Redirectable = Union{IO, FileRedirect, RawFD, OS_HANDLE}
 const StdIOSet = NTuple{3, Redirectable}
@@ -236,9 +239,11 @@ Set environment variables to use when running the given `command`. `env` is eith
 dictionary mapping strings to strings, an array of strings of the form `"var=val"`, or
 zero or more `"var"=>val` pair arguments. In order to modify (rather than replace) the
 existing environment, create `env` through `copy(ENV)` and then setting `env["var"]=val`
-as desired, or use `addenv`.
+as desired, or use [`addenv`](@ref).
 
 The `dir` keyword argument can be used to specify a working directory for the command.
+
+See also [`Cmd`](@ref), [`addenv`](@ref), [`ENV`](@ref), [`pwd`](@ref).
 """
 setenv(cmd::Cmd, env; dir="") = Cmd(cmd; env=byteenv(env), dir=dir)
 setenv(cmd::Cmd, env::Pair{<:AbstractString}...; dir="") =
@@ -248,10 +253,12 @@ setenv(cmd::Cmd; dir="") = Cmd(cmd; dir=dir)
 """
     addenv(command::Cmd, env...; inherit::Bool = true)
 
-Merge new environment mappings into the given `Cmd` object, returning a new `Cmd` object.
+Merge new environment mappings into the given [`Cmd`](@ref) object, returning a new `Cmd` object.
 Duplicate keys are replaced.  If `command` does not contain any environment values set already,
 it inherits the current environment at time of `addenv()` call if `inherit` is `true`.
 
+See also [`Cmd`](@ref), [`setenv`](@ref), [`ENV`](@ref).
+
 !!! compat "Julia 1.6"
     This function requires Julia 1.6 or later.
 """
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
index d966065852b3af..daa534e068af66 100644
--- a/base/combinatorics.jl
+++ b/base/combinatorics.jl
@@ -91,7 +91,7 @@ function isperm(P::Tuple)
     end
 end
 
-isperm(P::Any16) = _isperm(P)
+isperm(P::Any32) = _isperm(P)
 
 # swap columns i and j of a, in-place
 function swapcols!(a::AbstractMatrix, i, j)
@@ -228,8 +228,15 @@ invpermute!(a, p::AbstractVector) = invpermute!!(a, copymutable(p))
 Return the inverse permutation of `v`.
 If `B = A[v]`, then `A == B[invperm(v)]`.
 
+See also [`sortperm`](@ref), [`invpermute!`](@ref), [`isperm`](@ref), [`permutedims`](@ref).
+
 # Examples
 ```jldoctest
+julia> p = (2, 3, 1);
+
+julia> invperm(p)
+(3, 1, 2)
+
 julia> v = [2; 4; 3; 1];
 
 julia> invperm(v)
@@ -286,7 +293,7 @@ function invperm(P::Tuple)
     end
 end
 
-invperm(P::Any16) = Tuple(invperm(collect(P)))
+invperm(P::Any32) = Tuple(invperm(collect(P)))
 
 #XXX This function should be moved to Combinatorics.jl but is currently used by Base.DSP.
 """
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 6eefc453111c98..492afa9b8ec1c6 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -35,73 +35,15 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         add_remark!(interp, sv, "Skipped call in throw block")
         return CallMeta(Any, false)
     end
-    valid_worlds = WorldRange()
-    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
-    splitunions = 1 < unionsplitcost(argtypes) <= InferenceParams(interp).MAX_UNION_SPLITTING
-    mts = Core.MethodTable[]
-    fullmatch = Bool[]
-    if splitunions
-        split_argtypes = switchtupleunion(argtypes)
-        applicable = Any[]
-        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
-        infos = MethodMatchInfo[]
-        for arg_n in split_argtypes
-            sig_n = argtypes_to_type(arg_n)
-            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
-            if mt === nothing
-                add_remark!(interp, sv, "Could not identify method table for call")
-                return CallMeta(Any, false)
-            end
-            mt = mt::Core.MethodTable
-            matches = findall(sig_n, method_table(interp); limit=max_methods)
-            if matches === missing
-                add_remark!(interp, sv, "For one of the union split cases, too many methods matched")
-                return CallMeta(Any, false)
-            end
-            push!(infos, MethodMatchInfo(matches))
-            for m in matches
-                push!(applicable, m)
-                push!(applicable_argtypes, arg_n)
-            end
-            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
-            found = false
-            for (i, mt′) in enumerate(mts)
-                if mt′ === mt
-                    fullmatch[i] &= thisfullmatch
-                    found = true
-                    break
-                end
-            end
-            if !found
-                push!(mts, mt)
-                push!(fullmatch, thisfullmatch)
-            end
-        end
-        info = UnionSplitInfo(infos)
-    else
-        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-        if mt === nothing
-            add_remark!(interp, sv, "Could not identify method table for call")
-            return CallMeta(Any, false)
-        end
-        mt = mt::Core.MethodTable
-        matches = findall(atype, method_table(interp, sv); limit=max_methods)
-        if matches === missing
-            # this means too many methods matched
-            # (assume this will always be true, so we don't compute / update valid age in this case)
-            add_remark!(interp, sv, "Too many methods matched")
-            return CallMeta(Any, false)
-        end
-        push!(mts, mt)
-        push!(fullmatch, _any(match->(match::MethodMatch).fully_covers, matches))
-        info = MethodMatchInfo(matches)
-        applicable = matches.matches
-        valid_worlds = matches.valid_worlds
-        applicable_argtypes = nothing
+
+    matches = find_matching_methods(argtypes, atype, method_table(interp, sv), InferenceParams(interp).MAX_UNION_SPLITTING, max_methods)
+    if isa(matches, FailedMethodMatch)
+        add_remark!(interp, sv, matches.reason)
+        return CallMeta(Any, false)
     end
+
+    (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
-    applicable = applicable::Array{Any,1}
     napplicable = length(applicable)
     rettype = Bottom
     edges = MethodInstance[]
@@ -137,14 +79,18 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         if splitunions
             splitsigs = switchtupleunion(sig)
             for sig_n in splitsigs
-                rt, edgecycle, edge = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, sv)
+                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, sv)
+                rt, edge = result.rt, result.edge
                 if edge !== nothing
                     push!(edges, edge)
                 end
-                this_argtypes = applicable_argtypes === nothing ? argtypes : applicable_argtypes[i]
-                const_rt, const_result = abstract_call_method_with_const_args(interp, rt, f, this_argtypes, match, sv, edgecycle, false)
-                if const_rt !== rt && const_rt ⊑ rt
-                    rt = const_rt
+                this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
+                const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
+                if const_result !== nothing
+                    const_rt, const_result = const_result
+                    if const_rt !== rt && const_rt ⊑ rt
+                        rt = const_rt
+                    end
                 end
                 push!(const_results, const_result)
                 if const_result !== nothing
@@ -156,16 +102,20 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                 end
             end
         else
-            this_rt, edgecycle, edge = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, sv)
+            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, sv)
+            this_rt, edge = result.rt, result.edge
             if edge !== nothing
                 push!(edges, edge)
             end
             # try constant propagation with argtypes for this match
             # this is in preparation for inlining, or improving the return result
-            this_argtypes = applicable_argtypes === nothing ? argtypes : applicable_argtypes[i]
-            const_this_rt, const_result = abstract_call_method_with_const_args(interp, this_rt, f, this_argtypes, match, sv, edgecycle, false)
-            if const_this_rt !== this_rt && const_this_rt ⊑ this_rt
-                this_rt = const_this_rt
+            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
+            const_result = abstract_call_method_with_const_args(interp, result, f, this_argtypes, match, sv, false)
+            if const_result !== nothing
+                const_this_rt, const_result = const_result
+                if const_this_rt !== this_rt && const_this_rt ⊑ this_rt
+                    this_rt = const_this_rt
+                end
             end
             push!(const_results, const_result)
             if const_result !== nothing
@@ -184,7 +134,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
             end
             condval = maybe_extract_const_bool(this_conditional)
             for i = 1:length(argtypes)
-                fargs[i] isa Slot || continue
+                fargs[i] isa SlotNumber || continue
                 if this_conditional isa InterConditional && this_conditional.slot == i
                     vtype = this_conditional.vtype
                     elsetype = this_conditional.elsetype
@@ -209,20 +159,17 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         info = ConstCallInfo(info, const_results)
     end
 
-    if rettype isa LimitedAccuracy
-        union!(sv.pclimitations, rettype.causes)
-        rettype = rettype.typ
-    end
+    rettype = collect_limitations!(rettype, sv)
     # if we have argument refinement information, apply that now to get the result
     if is_lattice_bool(rettype) && conditionals !== nothing && fargs !== nothing
         slot = 0
         vtype = elsetype = Any
         condval = maybe_extract_const_bool(rettype)
         for i in 1:length(fargs)
-            # find the first argument which supports refinment,
-            # and intersect all equvalent arguments with it
+            # find the first argument which supports refinement,
+            # and intersect all equivalent arguments with it
             arg = fargs[i]
-            arg isa Slot || continue # can't refine
+            arg isa SlotNumber || continue # can't refine
             old = argtypes[i]
             old isa Type || continue # unlikely to refine
             id = slot_id(arg)
@@ -273,7 +220,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         # and avoid keeping track of a more complex result type.
         rettype = Any
     end
-    add_call_backedges!(interp, rettype, edges, fullmatch, mts, atype, sv)
+    add_call_backedges!(interp, rettype, edges, matches, atype, sv)
     if !isempty(sv.pclimitations) # remove self, if present
         delete!(sv.pclimitations, sv)
         for caller in sv.callers_in_cycle
@@ -284,27 +231,110 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
     return CallMeta(rettype, info)
 end
 
-widenwrappedconditional(@nospecialize(typ))   = widenconditional(typ)
-widenwrappedconditional(typ::LimitedAccuracy) = LimitedAccuracy(widenconditional(typ.typ), typ.causes)
+struct FailedMethodMatch
+    reason::String
+end
 
-function add_call_backedges!(interp::AbstractInterpreter,
-                             @nospecialize(rettype),
-                             edges::Vector{MethodInstance},
-                             fullmatch::Vector{Bool}, mts::Vector{Core.MethodTable}, @nospecialize(atype),
-                             sv::InferenceState)
-    if rettype === Any
-        # for `NativeInterpreter`, we don't add backedges when a new method couldn't refine
-        # (widen) this type
-        return
+struct MethodMatches
+    applicable::Vector{Any}
+    info::MethodMatchInfo
+    valid_worlds::WorldRange
+    mt::Core.MethodTable
+    fullmatch::Bool
+end
+
+struct UnionSplitMethodMatches
+    applicable::Vector{Any}
+    applicable_argtypes::Vector{Vector{Any}}
+    info::UnionSplitInfo
+    valid_worlds::WorldRange
+    mts::Vector{Core.MethodTable}
+    fullmatches::Vector{Bool}
+end
+
+function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
+                               union_split::Int, max_methods::Int)
+    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
+    if 1 < unionsplitcost(argtypes) <= union_split
+        split_argtypes = switchtupleunion(argtypes)
+        infos = MethodMatchInfo[]
+        applicable = Any[]
+        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
+        valid_worlds = WorldRange()
+        mts = Core.MethodTable[]
+        fullmatches = Bool[]
+        for i in 1:length(split_argtypes)
+            arg_n = split_argtypes[i]::Vector{Any}
+            sig_n = argtypes_to_type(arg_n)
+            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
+            mt === nothing && return FailedMethodMatch("Could not identify method table for call")
+            mt = mt::Core.MethodTable
+            matches = findall(sig_n, method_table; limit = max_methods)
+            if matches === missing
+                return FailedMethodMatch("For one of the union split cases, too many methods matched")
+            end
+            push!(infos, MethodMatchInfo(matches))
+            for m in matches
+                push!(applicable, m)
+                push!(applicable_argtypes, arg_n)
+            end
+            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
+            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+            found = false
+            for (i, mt′) in enumerate(mts)
+                if mt′ === mt
+                    fullmatches[i] &= thisfullmatch
+                    found = true
+                    break
+                end
+            end
+            if !found
+                push!(mts, mt)
+                push!(fullmatches, thisfullmatch)
+            end
+        end
+        return UnionSplitMethodMatches(applicable,
+                                       applicable_argtypes,
+                                       UnionSplitInfo(infos),
+                                       valid_worlds,
+                                       mts,
+                                       fullmatches)
+    else
+        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
+        if mt === nothing
+            return FailedMethodMatch("Could not identify method table for call")
+        end
+        mt = mt::Core.MethodTable
+        matches = findall(atype, method_table; limit = max_methods)
+        if matches === missing
+            # this means too many methods matched
+            # (assume this will always be true, so we don't compute / update valid age in this case)
+            return FailedMethodMatch("Too many methods matched")
+        end
+        fullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+        return MethodMatches(matches.matches,
+                             MethodMatchInfo(matches),
+                             matches.valid_worlds,
+                             mt,
+                             fullmatch)
     end
+end
+
+function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), edges::Vector{MethodInstance},
+                             matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
+                             sv::InferenceState)
+    # for `NativeInterpreter`, we don't add backedges when a new method couldn't refine (widen) this type
+    rettype === Any && return
     for edge in edges
         add_backedge!(edge, sv)
     end
-    for (thisfullmatch, mt) in zip(fullmatch, mts)
-        if !thisfullmatch
-            # also need an edge to the method table in case something gets
-            # added that did not intersect with any existing method
-            add_mt_backedge!(mt, atype, sv)
+    # also need an edge to the method table in case something gets
+    # added that did not intersect with any existing method
+    if isa(matches, MethodMatches)
+        matches.fullmatch || add_mt_backedge!(matches.mt, atype, sv)
+    else
+        for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
+            thisfullmatch || add_mt_backedge!(mt, atype, sv)
         end
     end
 end
@@ -315,7 +345,7 @@ const RECURSION_MSG = "Bounded recursion detected. Call was widened to force con
 function abstract_call_method(interp::AbstractInterpreter, method::Method, @nospecialize(sig), sparams::SimpleVector, hardlimit::Bool, sv::InferenceState)
     if method.name === :depwarn && isdefined(Main, :Base) && method.module === Main.Base
         add_remark!(interp, sv, "Refusing to infer into `depwarn`")
-        return Any, false, nothing
+        return MethodCallResult(Any, false, false, nothing)
     end
     topmost = nothing
     # Limit argument type tuple growth of functions:
@@ -323,6 +353,7 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
     # and from the same method.
     # Returns the topmost occurrence of that repeated edge.
     edgecycle = false
+    edgelimited = false
     # The `method_for_inference_heuristics` will expand the given method's generator if
     # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
     # The other `CodeInfo`s we inspect will already have this field inflated, so we just
@@ -383,7 +414,7 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
                     # we have a self-cycle in the call-graph, but not in the inference graph (typically):
                     # break this edge now (before we record it) by returning early
                     # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                    return Any, true, nothing
+                    return MethodCallResult(Any, true, true, nothing)
                 end
                 topmost = nothing
                 edgecycle = true
@@ -432,7 +463,7 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
                 # since it's very unlikely that we'll try to inline this,
                 # or want make an invoke edge to its calling convention return type.
                 # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                return Any, true, nothing
+                return MethodCallResult(Any, true, true, nothing)
             end
             add_remark!(interp, sv, RECURSION_MSG)
             topmost = topmost::InferenceState
@@ -440,6 +471,7 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
             poison_callstack(sv, parentframe === nothing ? topmost : parentframe)
             sig = newsig
             sparams = svec()
+            edgelimited = true
         end
     end
 
@@ -471,76 +503,113 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
 
     rt, edge = typeinf_edge(interp, method, sig, sparams, sv)
     if edge === nothing
-        edgecycle = true
+        edgecycle = edgelimited = true
     end
-    return rt, edgecycle, edge
+    return MethodCallResult(rt, edgecycle, edgelimited, edge)
 end
 
-function abstract_call_method_with_const_args(interp::AbstractInterpreter, @nospecialize(rettype),
+# keeps result and context information of abstract method call, will be used by succeeding constant-propagation
+struct MethodCallResult
+    rt
+    edgecycle::Bool
+    edgelimited::Bool
+    edge::Union{Nothing,MethodInstance}
+    function MethodCallResult(@nospecialize(rt),
+                              edgecycle::Bool,
+                              edgelimited::Bool,
+                              edge::Union{Nothing,MethodInstance})
+        return new(rt, edgecycle, edgelimited, edge)
+    end
+end
+
+function abstract_call_method_with_const_args(interp::AbstractInterpreter, result::MethodCallResult,
                                               @nospecialize(f), argtypes::Vector{Any}, match::MethodMatch,
-                                              sv::InferenceState, edgecycle::Bool,
-                                              va_override::Bool)
-    mi = maybe_get_const_prop_profitable(interp, rettype, f, argtypes, match, sv, edgecycle)
-    mi === nothing && return Any, nothing
+                                              sv::InferenceState, va_override::Bool)
+    mi = maybe_get_const_prop_profitable(interp, result, f, argtypes, match, sv)
+    mi === nothing && return nothing
     # try constant prop'
     inf_cache = get_inference_cache(interp)
     inf_result = cache_lookup(mi, argtypes, inf_cache)
     if inf_result === nothing
         # if there might be a cycle, check to make sure we don't end up
         # calling ourselves here.
-        if edgecycle && _any(InfStackUnwind(sv)) do infstate
-                return match.method === infstate.linfo.def && any(infstate.result.overridden_by_const)
+        let result = result # prevent capturing
+            if result.edgecycle && _any(InfStackUnwind(sv)) do infstate
+                    # if the type complexity limiting didn't decide to limit the call signature (`result.edgelimited = false`)
+                    # we can relax the cycle detection by comparing `MethodInstance`s and allow inference to
+                    # propagate different constant elements if the recursion is finite over the lattice
+                    return (result.edgelimited ? match.method === infstate.linfo.def : mi === infstate.linfo) &&
+                            any(infstate.result.overridden_by_const)
+                end
+                add_remark!(interp, sv, "[constprop] Edge cycle encountered")
+                return nothing
             end
-            add_remark!(interp, sv, "[constprop] Edge cycle encountered")
-            return Any, nothing
         end
         inf_result = InferenceResult(mi, argtypes, va_override)
-        frame = InferenceState(inf_result, #=cache=#false, interp)
-        frame === nothing && return Any, nothing # this is probably a bad generated function (unsound), but just ignore it
+        if !any(inf_result.overridden_by_const)
+            add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
+            return nothing
+        end
+        frame = InferenceState(inf_result, #=cache=#:local, interp)
+        frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it
         frame.parent = sv
-        push!(inf_cache, inf_result)
-        typeinf(interp, frame) || return Any, nothing
+        typeinf(interp, frame) || return nothing
     end
     result = inf_result.result
     # if constant inference hits a cycle, just bail out
-    isa(result, InferenceState) && return Any, nothing
+    isa(result, InferenceState) && return nothing
     add_backedge!(mi, sv)
     return result, inf_result
 end
 
 # if there's a possibility we could get a better result (hopefully without doing too much work)
 # returns `MethodInstance` with constant arguments, returns nothing otherwise
-function maybe_get_const_prop_profitable(interp::AbstractInterpreter, @nospecialize(rettype),
+function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::MethodCallResult,
                                          @nospecialize(f), argtypes::Vector{Any}, match::MethodMatch,
-                                         sv::InferenceState, edgecycle::Bool)
-    const_prop_entry_heuristic(interp, rettype, sv, edgecycle) || return nothing
+                                         sv::InferenceState)
+    if !InferenceParams(interp).ipo_constant_propagation
+        add_remark!(interp, sv, "[constprop] Disabled by parameter")
+        return nothing
+    end
     method = match.method
+    force = force_const_prop(interp, f, method)
+    force || const_prop_entry_heuristic(interp, result, sv) || return nothing
     nargs::Int = method.nargs
     method.isva && (nargs -= 1)
-    if length(argtypes) < nargs
+    length(argtypes) < nargs && return nothing
+    if !(const_prop_argument_heuristic(interp, argtypes) || const_prop_rettype_heuristic(interp, result.rt))
+        add_remark!(interp, sv, "[constprop] Disabled by argument and rettype heuristics")
         return nothing
     end
-    const_prop_argument_heuristic(interp, argtypes) || const_prop_rettype_heuristic(interp, rettype) || return nothing
     allconst = is_allconst(argtypes)
-    force = force_const_prop(interp, f, method)
-    force || const_prop_function_heuristic(interp, f, argtypes, nargs, allconst) || return nothing
+    if !force
+        if !const_prop_function_heuristic(interp, f, argtypes, nargs, allconst)
+            add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
+            return nothing
+        end
+    end
     force |= allconst
-    mi = specialize_method(match, !force)
+    mi = specialize_method(match; preexisting=!force)
     if mi === nothing
         add_remark!(interp, sv, "[constprop] Failed to specialize")
         return nothing
     end
     mi = mi::MethodInstance
-    if !force && !const_prop_methodinstance_heuristic(interp, method, mi)
-        add_remark!(interp, sv, "[constprop] Disabled by heuristic")
+    if !force && !const_prop_methodinstance_heuristic(interp, match, mi, argtypes, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
         return nothing
     end
     return mi
 end
 
-function const_prop_entry_heuristic(interp::AbstractInterpreter, @nospecialize(rettype), sv::InferenceState, edgecycle::Bool)
-    call_result_unused(sv) && edgecycle && return false
-    return is_improvable(rettype) && InferenceParams(interp).ipo_constant_propagation
+function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, sv::InferenceState)
+    if call_result_unused(sv) && result.edgecycle
+        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
+        return false
+    end
+    is_improvable(result.rt) && return true
+    add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (unimprovable return type)")
+    return false
 end
 
 # see if propagating constants may be worthwhile
@@ -630,7 +699,10 @@ end
 # This is a heuristic to avoid trying to const prop through complicated functions
 # where we would spend a lot of time, but are probably unlikely to get an improved
 # result anyway.
-function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method::Method, mi::MethodInstance)
+function const_prop_methodinstance_heuristic(
+    interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance,
+    argtypes::Vector{Any}, sv::InferenceState)
+    method = match.method
     if method.is_for_opaque_closure
         # Not inlining an opaque closure can be very expensive, so be generous
         # with the const-prop-ability. It is quite possible that we can't infer
@@ -648,7 +720,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method
     if isdefined(code, :inferred) && !cache_inlineable
         cache_inf = code.inferred
         if !(cache_inf === nothing)
-            cache_inlineable = inlining_policy(interp)(cache_inf) !== nothing
+            src = inlining_policy(interp, cache_inf, get_curr_ssaflag(sv), mi, argtypes)
+            cache_inlineable = src !== nothing
         end
     end
     if !cache_inlineable
@@ -778,9 +851,11 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
             return ret, AbstractIterationInfo(calls)
         end
         if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).MAX_TUPLE_SPLAT
+            stateordonet = stateordonet_widened
             break
         end
         if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
+            stateordonet = stateordonet_widened
             break
         end
         nstatetype = getfield_tfunc(stateordonet, Const(2))
@@ -798,27 +873,40 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
     end
     # From here on, we start asking for results on the widened types, rather than
     # the precise (potentially const) state type
-    statetype = widenconst(statetype)
-    valtype = widenconst(valtype)
+    # statetype and valtype are reinitialized in the first iteration below from the
+    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+    statetype = Bottom
+    valtype = Bottom
+    may_have_terminated = Nothing <: stateordonet
     while valtype !== Any
-        stateordonet = abstract_call_known(interp, iteratef, nothing, Any[Const(iteratef), itertype, statetype], sv).rt
-        stateordonet = widenconst(stateordonet)
-        nounion = typesubtract(stateordonet, Nothing, 0)
-        if !isa(nounion, DataType) || !(nounion <: Tuple) || isvatuple(nounion) || length(nounion.parameters) != 2
+        nounion = typeintersect(stateordonet, Tuple{Any,Any})
+        if nounion !== Union{} && !isa(nounion, DataType)
+            # nounion is of a type we cannot handle
             valtype = Any
             break
         end
-        if nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype
+        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+            # reached a fixpoint or iterator failed/gave invalid answer
             if typeintersect(stateordonet, Nothing) === Union{}
-                # Reached a fixpoint, but Nothing is not possible => iterator is infinite or failing
-                return Any[Bottom], nothing
+                # ... but cannot terminate
+                if !may_have_terminated
+                    #  ... and cannot have terminated prior to this loop
+                    return Any[Bottom], nothing
+                else
+                    # iterator may have terminated prior to this loop, but not during it
+                    valtype = Bottom
+                end
             end
             break
         end
         valtype = tmerge(valtype, nounion.parameters[1])
         statetype = tmerge(statetype, nounion.parameters[2])
+        stateordonet = abstract_call_known(interp, iteratef, nothing, Any[Const(iteratef), itertype, statetype], sv).rt
+        stateordonet = widenconst(stateordonet)
+    end
+    if valtype !== Union{}
+        push!(ret, Vararg{valtype})
     end
-    push!(ret, Vararg{valtype})
     return ret, nothing
 end
 
@@ -921,7 +1009,7 @@ end
 function is_method_pure(method::Method, @nospecialize(sig), sparams::SimpleVector)
     if isdefined(method, :generator)
         method.generator.expand_early || return false
-        mi = specialize_method(method, sig, sparams, false)
+        mi = specialize_method(method, sig, sparams)
         isa(mi, MethodInstance) || return false
         staged = get_staged(mi)
         (staged isa CodeInfo && (staged::CodeInfo).pure) || return false
@@ -983,10 +1071,10 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
                 # try to simulate this as a real conditional (`cnd ? x : y`), so that the penalty for using `ifelse` instead isn't too high
                 a = ssa_def_slot(fargs[3], sv)
                 b = ssa_def_slot(fargs[4], sv)
-                if isa(a, Slot) && slot_id(cnd.var) == slot_id(a)
+                if isa(a, SlotNumber) && slot_id(cnd.var) == slot_id(a)
                     tx = (cnd.vtype ⊑ tx ? cnd.vtype : tmeet(tx, widenconst(cnd.vtype)))
                 end
-                if isa(b, Slot) && slot_id(cnd.var) == slot_id(b)
+                if isa(b, SlotNumber) && slot_id(cnd.var) == slot_id(b)
                     ty = (cnd.elsetype ⊑ ty ? cnd.elsetype : tmeet(ty, widenconst(cnd.elsetype)))
                 end
                 return tmerge(tx, ty)
@@ -1007,7 +1095,7 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
         # perform very limited back-propagation of type information for `is` and `isa`
         if f === isa
             a = ssa_def_slot(fargs[2], sv)
-            if isa(a, Slot)
+            if isa(a, SlotNumber)
                 aty = widenconst(argtypes[2])
                 if rt === Const(false)
                     return Conditional(a, Union{}, aty)
@@ -1030,7 +1118,7 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
             aty = argtypes[2]
             bty = argtypes[3]
             # if doing a comparison to a singleton, consider returning a `Conditional` instead
-            if isa(aty, Const) && isa(b, Slot)
+            if isa(aty, Const) && isa(b, SlotNumber)
                 if rt === Const(false)
                     aty = Union{}
                 elseif rt === Const(true)
@@ -1040,7 +1128,7 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
                 end
                 return Conditional(b, aty, bty)
             end
-            if isa(bty, Const) && isa(a, Slot)
+            if isa(bty, Const) && isa(a, SlotNumber)
                 if rt === Const(false)
                     bty = Union{}
                 elseif rt === Const(true)
@@ -1051,10 +1139,10 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
                 return Conditional(a, bty, aty)
             end
             # narrow the lattice slightly (noting the dependency on one of the slots), to promote more effective smerge
-            if isa(b, Slot)
+            if isa(b, SlotNumber)
                 return Conditional(b, rt === Const(false) ? Union{} : bty, rt === Const(true) ? Union{} : bty)
             end
-            if isa(a, Slot)
+            if isa(a, SlotNumber)
                 return Conditional(a, rt === Const(false) ? Union{} : aty, rt === Const(true) ? Union{} : aty)
             end
         elseif f === Core.Compiler.not_int
@@ -1109,7 +1197,8 @@ function abstract_call_unionall(argtypes::Vector{Any})
 end
 
 function abstract_invoke(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
-    ft = widenconst(argtype_by_index(argtypes, 2))
+    ft′ = argtype_by_index(argtypes, 2)
+    ft = widenconst(ft′)
     ft === Bottom && return CallMeta(Bottom, false)
     (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3))
     types === Bottom && return CallMeta(Bottom, false)
@@ -1119,19 +1208,37 @@ function abstract_invoke(interp::AbstractInterpreter, argtypes::Vector{Any}, sv:
     nargtype === Bottom && return CallMeta(Bottom, false)
     nargtype isa DataType || return CallMeta(Any, false) # other cases are not implemented below
     isdispatchelem(ft) || return CallMeta(Any, false) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
-    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)
+    ft = ft::DataType
+    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)::Type
     nargtype = Tuple{ft, nargtype.parameters...}
     argtype = Tuple{ft, argtype.parameters...}
     result = findsup(types, method_table(interp))
-    if result === nothing
-        return CallMeta(Any, false)
-    end
+    result === nothing && return CallMeta(Any, false)
     method, valid_worlds = result
     update_valid_age!(sv, valid_worlds)
-    (ti, env) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
-    rt, edge = typeinf_edge(interp, method, ti, env, sv)
+    (ti, env::SimpleVector) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
+    (; rt, edge) = result = abstract_call_method(interp, method, ti, env, false, sv)
     edge !== nothing && add_backedge!(edge::MethodInstance, sv)
-    return CallMeta(rt, InvokeCallInfo(MethodMatch(ti, env, method, argtype <: method.sig)))
+    match = MethodMatch(ti, env, method, argtype <: method.sig)
+    # try constant propagation with manual inlinings of some of the heuristics
+    # since some checks within `abstract_call_method_with_const_args` seem a bit costly
+    const_prop_entry_heuristic(interp, result, sv) || return CallMeta(rt, InvokeCallInfo(match, nothing))
+    argtypes′ = argtypes[4:end]
+    const_prop_argument_heuristic(interp, argtypes′) || const_prop_rettype_heuristic(interp, rt) || return CallMeta(rt, InvokeCallInfo(match, nothing))
+    pushfirst!(argtypes′, ft)
+    # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
+    # for i in 1:length(argtypes′)
+    #     t, a = ti.parameters[i], argtypes′[i]
+    #     argtypes′[i] = t ⊑ a ? t : a
+    # end
+    const_result = abstract_call_method_with_const_args(interp, result, argtype_to_function(ft′), argtypes′, match, sv, false)
+    if const_result !== nothing
+        const_rt, const_result = const_result
+        if const_rt !== rt && const_rt ⊑ rt
+            return CallMeta(collect_limitations!(const_rt, sv), InvokeCallInfo(match, const_result))
+        end
+    end
+    return CallMeta(collect_limitations!(rt, sv), InvokeCallInfo(match, nothing))
 end
 
 # call where the function is known exactly
@@ -1147,6 +1254,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             return abstract_apply(interp, argtypes, sv, max_methods)
         elseif f === invoke
             return abstract_invoke(interp, argtypes, sv)
+        elseif f === modifyfield!
+            return abstract_modifyfield!(interp, argtypes, sv)
         end
         return CallMeta(abstract_call_builtin(interp, f, fargs, argtypes, sv, max_methods), false)
     elseif f === Core.kwfunc
@@ -1231,29 +1340,30 @@ end
 function abstract_call_opaque_closure(interp::AbstractInterpreter, closure::PartialOpaque, argtypes::Vector{Any}, sv::InferenceState)
     pushfirst!(argtypes, closure.env)
     sig = argtypes_to_type(argtypes)
-    rt, edgecycle, edge = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, sv)
+    (; rt, edge) = result = abstract_call_method(interp, closure.source, sig, Core.svec(), false, sv)
     edge !== nothing && add_backedge!(edge, sv)
     tt = closure.typ
-    sigT = unwrap_unionall(tt).parameters[1]
-    match = MethodMatch(sig, Core.svec(), closure.source::Method, sig <: rewrap_unionall(sigT, tt))
+    sigT = (unwrap_unionall(tt)::DataType).parameters[1]
+    match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
     info = OpaqueClosureCallInfo(match)
-    if !edgecycle
-        const_rettype, result = abstract_call_method_with_const_args(interp, rt, closure, argtypes,
-            match, sv, edgecycle, closure.isva)
-        if const_rettype ⊑ rt
-           rt = const_rettype
-        end
-        if result !== nothing
-            info = ConstCallInfo(info, Union{Nothing,InferenceResult}[result])
+    if !result.edgecycle
+        const_result = abstract_call_method_with_const_args(interp, result, closure, argtypes,
+            match, sv, closure.isva)
+        if const_result !== nothing
+            const_rettype, const_result = const_result
+            if const_rettype ⊑ rt
+               rt = const_rettype
+            end
+            info = ConstCallInfo(info, Union{Nothing,InferenceResult}[const_result])
         end
     end
-    return CallMeta(rt, info)
+    return CallMeta(collect_limitations!(rt, sv), info)
 end
 
 function most_general_argtypes(closure::PartialOpaque)
     ret = Any[]
     cc = widenconst(closure)
-    argt = unwrap_unionall(cc).parameters[1]
+    argt = (unwrap_unionall(cc)::DataType).parameters[1]
     if !isa(argt, DataType) || argt.name !== typename(Tuple)
         argt = Tuple
     end
@@ -1265,17 +1375,12 @@ function abstract_call(interp::AbstractInterpreter, fargs::Union{Nothing,Vector{
                        sv::InferenceState, max_methods::Int = InferenceParams(interp).MAX_METHODS)
     #print("call ", e.args[1], argtypes, "\n\n")
     ft = argtypes[1]
-    if isa(ft, Const)
-        f = ft.val
-    elseif isconstType(ft)
-        f = ft.parameters[1]
-    elseif isa(ft, DataType) && isdefined(ft, :instance)
-        f = ft.instance
-    elseif isa(ft, PartialOpaque)
+    f = argtype_to_function(ft)
+    if isa(ft, PartialOpaque)
         return abstract_call_opaque_closure(interp, ft, argtypes[2:end], sv)
-    elseif isa(unwrap_unionall(ft), DataType) && unwrap_unionall(ft).name === typename(Core.OpaqueClosure)
-        return CallMeta(rewrap_unionall(unwrap_unionall(ft).parameters[2], ft), false)
-    else
+    elseif (uft = unwrap_unionall(ft); isa(uft, DataType) && uft.name === typename(Core.OpaqueClosure))
+        return CallMeta(rewrap_unionall((uft::DataType).parameters[2], ft), false)
+    elseif f === nothing
         # non-constant function, but the number of arguments is known
         # and the ft is not a Builtin or IntrinsicFunction
         if typeintersect(widenconst(ft), Union{Builtin, Core.OpaqueClosure}) != Union{}
@@ -1287,6 +1392,18 @@ function abstract_call(interp::AbstractInterpreter, fargs::Union{Nothing,Vector{
     return abstract_call_known(interp, f, fargs, argtypes, sv, max_methods)
 end
 
+function argtype_to_function(@nospecialize(ft))
+    if isa(ft, Const)
+        return ft.val
+    elseif isconstType(ft)
+        return ft.parameters[1]
+    elseif isa(ft, DataType) && isdefined(ft, :instance)
+        return ft.instance
+    else
+        return nothing
+    end
+end
+
 function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
     isref = false
     if T === Bottom
@@ -1358,7 +1475,7 @@ function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(
         return Const((e::QuoteNode).value)
     elseif isa(e, SSAValue)
         return abstract_eval_ssavalue(e::SSAValue, sv.src)
-    elseif isa(e, Slot) || isa(e, Argument)
+    elseif isa(e, SlotNumber) || isa(e, Argument)
         return (vtypes[slot_id(e)]::VarState).typ
     elseif isa(e, GlobalRef)
         return abstract_eval_global(e.mod, e.name)
@@ -1395,10 +1512,18 @@ end
 
 function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
     if !isa(e, Expr)
+        if isa(e, PhiNode)
+            rt = Union{}
+            for val in e.values
+                rt = tmerge(rt, abstract_eval_special_value(interp, val, vtypes, sv))
+            end
+            return rt
+        end
         return abstract_eval_special_value(interp, e, vtypes, sv)
     end
     e = e::Expr
-    if e.head === :call
+    ehead = e.head
+    if ehead === :call
         ea = e.args
         argtypes = collect_argtypes(interp, ea, vtypes, sv)
         if argtypes === nothing
@@ -1408,15 +1533,15 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
             sv.stmt_info[sv.currpc] = callinfo.info
             t = callinfo.rt
         end
-    elseif e.head === :new
+    elseif ehead === :new
         t = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))[1]
-        if isconcretetype(t) && !t.mutable
+        if isconcretetype(t) && !ismutabletype(t)
             args = Vector{Any}(undef, length(e.args)-1)
             ats = Vector{Any}(undef, length(e.args)-1)
             anyconst = false
             allconst = true
             for i = 2:length(e.args)
-                at = abstract_eval_value(interp, e.args[i], vtypes, sv)
+                at = widenconditional(abstract_eval_value(interp, e.args[i], vtypes, sv))
                 if !anyconst
                     anyconst = has_nontrivial_const_info(at)
                 end
@@ -1445,20 +1570,20 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 end
             end
         end
-    elseif e.head === :splatnew
+    elseif ehead === :splatnew
         t = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))[1]
-        if length(e.args) == 2 && isconcretetype(t) && !t.mutable
+        if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t)
             at = abstract_eval_value(interp, e.args[2], vtypes, sv)
             n = fieldcount(t)
-            if isa(at, Const) && (val = at.val; isa(val, Tuple)) && n == length(val) &&
-                let t = t, val = val; _all(i->val[i] isa fieldtype(t, i), 1:n); end
-                t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, val))
-            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields) &&
-                let t = t, at = at; _all(i->at.fields[i] ⊑ fieldtype(t, i), 1:n); end
-                t = PartialStruct(t, at.fields)
+            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+                let t = t; _all(i->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end
+                t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
+            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields::Vector{Any}) &&
+                let t = t, at = at; _all(i->(at.fields::Vector{Any})[i] ⊑ fieldtype(t, i), 1:n); end
+                t = PartialStruct(t, at.fields::Vector{Any})
             end
         end
-    elseif e.head === :new_opaque_closure
+    elseif ehead === :new_opaque_closure
         t = Union{}
         if length(e.args) >= 5
             ea = e.args
@@ -1477,7 +1602,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 end
             end
         end
-    elseif e.head === :foreigncall
+    elseif ehead === :foreigncall
         abstract_eval_value(interp, e.args[1], vtypes, sv)
         t = sp_type_rewrap(e.args[2], sv.linfo, true)
         for i = 3:length(e.args)
@@ -1485,25 +1610,25 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Bottom
             end
         end
-    elseif e.head === :cfunction
+    elseif ehead === :cfunction
         t = e.args[1]
         isa(t, Type) || (t = Any)
         abstract_eval_cfunction(interp, e, vtypes, sv)
-    elseif e.head === :method
+    elseif ehead === :method
         t = (length(e.args) == 1) ? Any : Nothing
-    elseif e.head === :copyast
+    elseif ehead === :copyast
         t = abstract_eval_value(interp, e.args[1], vtypes, sv)
         if t isa Const && t.val isa Expr
             # `copyast` makes copies of Exprs
             t = Expr
         end
-    elseif e.head === :invoke
+    elseif ehead === :invoke || ehead === :invoke_modify
         error("type inference data-flow error: tried to double infer a function")
-    elseif e.head === :isdefined
+    elseif ehead === :isdefined
         sym = e.args[1]
         t = Bool
-        if isa(sym, Slot)
-            vtyp = vtypes[slot_id(sym)]
+        if isa(sym, SlotNumber)
+            vtyp = vtypes[slot_id(sym)]::VarState
             if vtyp.typ === Bottom
                 t = Const(false) # never assigned previously
             elseif !vtyp.undef
@@ -1518,7 +1643,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Const(true)
             end
         elseif isa(sym, Expr) && sym.head === :static_parameter
-            n = sym.args[1]
+            n = sym.args[1]::Int
             if 1 <= n <= length(sv.sptypes)
                 spty = sv.sptypes[n]
                 if isa(spty, Const)
@@ -1553,7 +1678,7 @@ function abstract_eval_global(M::Module, s::Symbol)
 end
 
 function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
-    typ = src.ssavaluetypes[s.id]
+    typ = (src.ssavaluetypes::Vector{Any})[s.id]
     if typ === NOT_FOUND
         return Bottom
     end
@@ -1634,32 +1759,31 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
     @assert !frame.inferred
     frame.dont_work_on_me = true # mark that this function is currently on the stack
     W = frame.ip
-    s = frame.stmt_types
+    states = frame.stmt_types
     n = frame.nstmts
     nargs = frame.nargs
     def = frame.linfo.def
     isva = isa(def, Method) && def.isva
     nslots = nargs - isva
     slottypes = frame.slottypes
+    ssavaluetypes = frame.src.ssavaluetypes::Vector{Any}
     while frame.pc´´ <= n
         # make progress on the active ip set
-        local pc::Int = frame.pc´´ # current program-counter
+        local pc::Int = frame.pc´´
         while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
             #print(pc,": ",s[pc],"\n")
             local pc´::Int = pc + 1 # next program-counter (after executing instruction)
             if pc == frame.pc´´
-                # need to update pc´´ to point at the new lowest instruction in W
-                min_pc = _bits_findnext(W.bits, pc + 1)
-                frame.pc´´ = min_pc == -1 ? n + 1 : min_pc
+                # want to update pc´´ to point at the new lowest instruction in W
+                frame.pc´´ = pc´
             end
             delete!(W, pc)
             frame.currpc = pc
-            frame.cur_hand = frame.handler_at[pc]
             edges = frame.stmt_edges[pc]
             edges === nothing || empty!(edges)
             frame.stmt_info[pc] = nothing
             stmt = frame.src.code[pc]
-            changes = s[pc]::VarTable
+            changes = states[pc]::VarTable
             t = nothing
 
             hd = isa(stmt, Expr) ? stmt.head : nothing
@@ -1670,13 +1794,17 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
             elseif isa(stmt, GotoNode)
                 pc´ = (stmt::GotoNode).label
             elseif isa(stmt, GotoIfNot)
-                condt = abstract_eval_value(interp, stmt.cond, changes, frame)
+                condx = stmt.cond
+                condt = abstract_eval_value(interp, condx, changes, frame)
                 if condt === Bottom
                     empty!(frame.pclimitations)
-                end
-                if condt === Bottom
                     break
                 end
+                if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condx, SlotNumber)
+                    # if this non-`Conditional` object is a slot, we form and propagate
+                    # the conditional constraint on it
+                    condt = Conditional(condx, Const(true), Const(false))
+                end
                 condval = maybe_extract_const_bool(condt)
                 l = stmt.dest::Int
                 if !isempty(frame.pclimitations)
@@ -1692,20 +1820,19 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     pc´ = l
                 else
                     # general case
-                    frame.handler_at[l] = frame.cur_hand
                     changes_else = changes
                     if isa(condt, Conditional)
                         changes_else = conditional_changes(changes_else, condt.elsetype, condt.var)
                         changes      = conditional_changes(changes,      condt.vtype,    condt.var)
                     end
-                    newstate_else = stupdate!(s[l], changes_else)
+                    newstate_else = stupdate!(states[l], changes_else)
                     if newstate_else !== nothing
                         # add else branch to active IP list
                         if l < frame.pc´´
                             frame.pc´´ = l
                         end
                         push!(W, l)
-                        s[l] = newstate_else
+                        states[l] = newstate_else
                     end
                 end
             elseif isa(stmt, ReturnNode)
@@ -1740,7 +1867,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     for (caller, caller_pc) in frame.cycle_backedges
                         # notify backedges of updated type information
                         typeassert(caller.stmt_types[caller_pc], VarTable) # we must have visited this statement before
-                        if !(caller.src.ssavaluetypes[caller_pc] === Any)
+                        if !((caller.src.ssavaluetypes::Vector{Any})[caller_pc] === Any)
                             # no reason to revisit if that call-site doesn't affect the final result
                             if caller_pc < caller.pc´´
                                 caller.pc´´ = caller_pc
@@ -1750,41 +1877,41 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     end
                 end
             elseif hd === :enter
+                stmt = stmt::Expr
                 l = stmt.args[1]::Int
-                frame.cur_hand = Pair{Any,Any}(l, frame.cur_hand)
                 # propagate type info to exception handler
-                old = s[l]
+                old = states[l]
                 newstate_catch = stupdate!(old, changes)
                 if newstate_catch !== nothing
                     if l < frame.pc´´
                         frame.pc´´ = l
                     end
                     push!(W, l)
-                    s[l] = newstate_catch
+                    states[l] = newstate_catch
                 end
-                typeassert(s[l], VarTable)
-                frame.handler_at[l] = frame.cur_hand
+                typeassert(states[l], VarTable)
             elseif hd === :leave
-                for i = 1:((stmt.args[1])::Int)
-                    frame.cur_hand = (frame.cur_hand::Pair{Any,Any}).second
-                end
             else
                 if hd === :(=)
+                    stmt = stmt::Expr
                     t = abstract_eval_statement(interp, stmt.args[2], changes, frame)
                     if t === Bottom
                         break
                     end
-                    frame.src.ssavaluetypes[pc] = t
+                    ssavaluetypes[pc] = t
                     lhs = stmt.args[1]
-                    if isa(lhs, Slot)
+                    if isa(lhs, SlotNumber)
                         changes = StateUpdate(lhs, VarState(t, false), changes, false)
                     end
                 elseif hd === :method
+                    stmt = stmt::Expr
                     fname = stmt.args[1]
-                    if isa(fname, Slot)
+                    if isa(fname, SlotNumber)
                         changes = StateUpdate(fname, VarState(Any, false), changes, false)
                     end
-                elseif hd === :inbounds || hd === :meta || hd === :loopinfo || hd === :code_coverage_effect
+                elseif hd === :code_coverage_effect ||
+                       (hd !== :boundscheck && # :boundscheck can be narrowed to Bool
+                        hd !== nothing && is_meta_expr_head(hd))
                     # these do not generate code
                 else
                     t = abstract_eval_statement(interp, stmt, changes, frame)
@@ -1794,19 +1921,25 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     if !isempty(frame.ssavalue_uses[pc])
                         record_ssa_assign(pc, t, frame)
                     else
-                        frame.src.ssavaluetypes[pc] = t
+                        ssavaluetypes[pc] = t
                     end
                 end
-                if frame.cur_hand !== nothing && isa(changes, StateUpdate)
-                    # propagate new type info to exception handler
-                    # the handling for Expr(:enter) propagates all changes from before the try/catch
-                    # so this only needs to propagate any changes
-                    l = frame.cur_hand.first::Int
-                    if stupdate1!(s[l]::VarTable, changes::StateUpdate) !== false
-                        if l < frame.pc´´
-                            frame.pc´´ = l
+                if isa(changes, StateUpdate)
+                    let cur_hand = frame.handler_at[pc], l, enter
+                        while cur_hand != 0
+                            enter = frame.src.code[cur_hand]
+                            l = (enter::Expr).args[1]::Int
+                            # propagate new type info to exception handler
+                            # the handling for Expr(:enter) propagates all changes from before the try/catch
+                            # so this only needs to propagate any changes
+                            if stupdate1!(states[l]::VarTable, changes::StateUpdate) !== false
+                                if l < frame.pc´´
+                                    frame.pc´´ = l
+                                end
+                                push!(W, l)
+                            end
+                            cur_hand = frame.handler_at[cur_hand]
                         end
-                        push!(W, l)
                     end
                 end
             end
@@ -1815,24 +1948,23 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
 
             if t === nothing
                 # mark other reached expressions as `Any` to indicate they don't throw
-                frame.src.ssavaluetypes[pc] = Any
+                ssavaluetypes[pc] = Any
             end
 
             pc´ > n && break # can't proceed with the fast-path fall-through
-            frame.handler_at[pc´] = frame.cur_hand
-            newstate = stupdate!(s[pc´], changes)
+            newstate = stupdate!(states[pc´], changes)
             if isa(stmt, GotoNode) && frame.pc´´ < pc´
                 # if we are processing a goto node anyways,
                 # (such as a terminator for a loop, if-else, or try block),
                 # consider whether we should jump to an older backedge first,
                 # to try to traverse the statements in approximate dominator order
                 if newstate !== nothing
-                    s[pc´] = newstate
+                    states[pc´] = newstate
                 end
                 push!(W, pc´)
-                pc = frame.pc´´
+                break
             elseif newstate !== nothing
-                s[pc´] = newstate
+                states[pc´] = newstate
                 pc = pc´
             elseif pc´ in W
                 pc = pc´
@@ -1840,13 +1972,19 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                 break
             end
         end
+        frame.pc´´ = _bits_findnext(W.bits, frame.pc´´)::Int # next program-counter
     end
     frame.dont_work_on_me = false
     nothing
 end
 
-function conditional_changes(changes::VarTable, @nospecialize(typ), var::Slot)
-    if typ ⊑ (changes[slot_id(var)]::VarState).typ
+function conditional_changes(changes::VarTable, @nospecialize(typ), var::SlotNumber)
+    oldtyp = (changes[slot_id(var)]::VarState).typ
+    # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
+    # since we probably formed these types with `typesubstract`, the comparison is likely simple
+    if ignorelimited(typ) ⊑ ignorelimited(oldtyp)
+        # typ is better unlimited, but we may still need to compute the tmeet with the limit "causes" since we ignored those in the comparison
+        oldtyp isa LimitedAccuracy && (typ = tmerge(typ, LimitedAccuracy(Bottom, oldtyp.causes)))
         return StateUpdate(var, VarState(typ, false), changes, true)
     end
     return changes
diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
index f351429aff7eb7..f4adb47abf3d14 100644
--- a/base/compiler/bootstrap.jl
+++ b/base/compiler/bootstrap.jl
@@ -5,10 +5,20 @@
 # especially try to make sure any recursive and leaf functions have concrete signatures,
 # since we won't be able to specialize & infer them at runtime
 
-let fs = Any[typeinf_ext, typeinf, typeinf_edge, pure_eval_call, run_passes],
-    world = get_world_counter(),
+time() = ccall(:jl_clock_now, Float64, ())
+
+let
+    world = get_world_counter()
     interp = NativeInterpreter(world)
 
+    fs = Any[
+        # we first create caches for the optimizer, because they contain many loop constructions
+        # and they're better to not run in interpreter even during bootstrapping
+        run_passes,
+        # then we create caches for inference entries
+        typeinf_ext, typeinf, typeinf_edge,
+    ]
+    # tfuncs can't be inferred from the inference entries above, so here we infer them manually
     for x in T_FFUNC_VAL
         push!(fs, x[3])
     end
@@ -20,6 +30,7 @@ let fs = Any[typeinf_ext, typeinf, typeinf_edge, pure_eval_call, run_passes],
             println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i)))
         end
     end
+    starttime = time()
     for f in fs
         for m in _methods_by_ftype(Tuple{typeof(f), Vararg{Any}}, 10, typemax(UInt))
             # remove any TypeVars from the intersection
@@ -32,4 +43,6 @@ let fs = Any[typeinf_ext, typeinf, typeinf_edge, pure_eval_call, run_passes],
             typeinf_type(interp, m.method, Tuple{typ...}, m.sparams)
         end
     end
+    endtime = time()
+    println("Core.Compiler ──── ", sub_float(endtime,starttime), " seconds")
 end
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 3a2ee778cbe58d..3f3883eb4647f3 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -10,6 +10,9 @@ import Core: print, println, show, write, unsafe_write, stdout, stderr,
 
 const getproperty = Core.getfield
 const setproperty! = Core.setfield!
+const swapproperty! = Core.swapfield!
+const modifyproperty! = Core.modifyfield!
+const replaceproperty! = Core.replacefield!
 
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Compiler, false)
 
@@ -19,9 +22,10 @@ eval(m, x) = Core.eval(m, x)
 include(x) = Core.include(Compiler, x)
 include(mod, x) = Core.include(mod, x)
 
-#############
-# from Base #
-#############
+# The @inline/@noinline macros that can be applied to a function declaration are not available
+# until after array.jl, and so we will mark them within a function body instead.
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
 
 # essential files and libraries
 include("essentials.jl")
@@ -80,9 +84,9 @@ using .Iterators: Flatten, Filter, product  # for generators
 include("namedtuple.jl")
 
 ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@_inline_meta; (f(1),))
-ntuple(f, ::Val{2}) = (@_inline_meta; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@_inline_meta; (f(1), f(2), f(3)))
+ntuple(f, ::Val{1}) = (@inline; (f(1),))
+ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
+ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
 ntuple(f, ::Val{n}) where {n} = ntuple(f, n::Int)
 ntuple(f, n) = (Any[f(i) for i = 1:n]...,)
 
@@ -134,4 +138,3 @@ Core.eval(Core, :(_parse = Compiler.fl_parse))
 
 end # baremodule Compiler
 ))
-
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
index 327ab85d104f39..483e2f38d9ee84 100644
--- a/base/compiler/inferenceresult.jl
+++ b/base/compiler/inferenceresult.jl
@@ -13,33 +13,35 @@ end
 # for the provided `linfo` and `given_argtypes`. The purpose of this function is
 # to return a valid value for `cache_lookup(linfo, argtypes, cache).argtypes`,
 # so that we can construct cache-correct `InferenceResult`s in the first place.
-function matching_cache_argtypes(linfo::MethodInstance, given_argtypes::Vector, va_override)
+function matching_cache_argtypes(linfo::MethodInstance, given_argtypes::Vector, va_override::Bool)
     @assert isa(linfo.def, Method) # ensure the next line works
     nargs::Int = linfo.def.nargs
-    @assert length(given_argtypes) >= (nargs - 1)
     given_argtypes = anymap(widenconditional, given_argtypes)
-    if va_override || linfo.def.isva
+    isva = va_override || linfo.def.isva
+    if isva || isvarargtype(given_argtypes[end])
         isva_given_argtypes = Vector{Any}(undef, nargs)
-        for i = 1:(nargs - 1)
+        for i = 1:(nargs - isva)
             isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
         end
-        if length(given_argtypes) >= nargs || !isvarargtype(given_argtypes[end])
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[nargs:end])
-        else
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[end:end])
+        if isva
+            if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
+                last = length(given_argtypes)
+            else
+                last = nargs
+            end
+            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[last:end])
         end
         given_argtypes = isva_given_argtypes
     end
+    @assert length(given_argtypes) == nargs
     cache_argtypes, overridden_by_const = matching_cache_argtypes(linfo, nothing, va_override)
-    if nargs === length(given_argtypes)
-        for i in 1:nargs
-            given_argtype = given_argtypes[i]
-            cache_argtype = cache_argtypes[i]
-            if !is_argtype_match(given_argtype, cache_argtype, overridden_by_const[i])
-                # prefer the argtype we were given over the one computed from `linfo`
-                cache_argtypes[i] = given_argtype
-                overridden_by_const[i] = true
-            end
+    for i in 1:nargs
+        given_argtype = given_argtypes[i]
+        cache_argtype = cache_argtypes[i]
+        if !is_argtype_match(given_argtype, cache_argtype, overridden_by_const[i])
+            # prefer the argtype we were given over the one computed from `linfo`
+            cache_argtypes[i] = given_argtype
+            overridden_by_const[i] = true
         end
     end
     return cache_argtypes, overridden_by_const
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index cb5d2009a9171e..51cee89d239eb5 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -28,9 +28,7 @@ mutable struct InferenceState
     pc´´::LineNum
     nstmts::Int
     # current exception handler info
-    cur_hand #::Union{Nothing, Pair{LineNum, prev_handler}}
-    handler_at::Vector{Any}
-    n_handlers::Int
+    handler_at::Vector{LineNum}
     # ssavalue sparsity and restart info
     ssavalue_uses::Vector{BitSet}
     throw_blocks::BitSet
@@ -55,10 +53,9 @@ mutable struct InferenceState
 
     # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
     function InferenceState(result::InferenceResult, src::CodeInfo,
-                            cached::Bool, interp::AbstractInterpreter)
-        linfo = result.linfo
-        code = src.code::Array{Any,1}
-        toplevel = !isa(linfo.def, Method)
+                            cache::Symbol, interp::AbstractInterpreter)
+        (; def) = linfo = result.linfo
+        code = src.code::Vector{Any}
 
         sp = sptypes_from_meth_instance(linfo::MethodInstance)
 
@@ -87,43 +84,120 @@ mutable struct InferenceState
         throw_blocks = find_throw_blocks(code)
 
         # exception handlers
-        cur_hand = nothing
-        handler_at = Any[ nothing for i=1:n ]
-        n_handlers = 0
-
-        W = BitSet()
-        push!(W, 1) #initial pc to visit
-
-        if !toplevel
-            meth = linfo.def
-            inmodule = meth.module
-        else
-            inmodule = linfo.def::Module
-        end
+        ip = BitSet()
+        handler_at = compute_trycatch(src.code, ip)
+        push!(ip, 1)
 
+        mod = isa(def, Method) ? def.module : def
         valid_worlds = WorldRange(src.min_world,
             src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
+
+        @assert cache === :no || cache === :local || cache === :global
         frame = new(
             InferenceParams(interp), result, linfo,
-            sp, slottypes, inmodule, 0,
+            sp, slottypes, mod, 0,
             IdSet{InferenceState}(), IdSet{InferenceState}(),
             src, get_world_counter(interp), valid_worlds,
             nargs, s_types, s_edges, stmt_info,
-            Union{}, W, 1, n,
-            cur_hand, handler_at, n_handlers,
+            Union{}, ip, 1, n, handler_at,
             ssavalue_uses, throw_blocks,
             Vector{Tuple{InferenceState,LineNum}}(), # cycle_backedges
             Vector{InferenceState}(), # callers_in_cycle
             #=parent=#nothing,
-            cached, false, false,
+            cache === :global, false, false,
             CachedMethodTable(method_table(interp)),
             interp)
         result.result = frame
-        cached && push!(get_inference_cache(interp), result)
+        cache !== :no && push!(get_inference_cache(interp), result)
         return frame
     end
 end
 
+function compute_trycatch(code::Vector{Any}, ip::BitSet)
+    # The goal initially is to record the frame like this for the state at exit:
+    # 1: (enter 3) # == 0
+    # 3: (expr)    # == 1
+    # 3: (leave 1) # == 1
+    # 4: (expr)    # == 0
+    # then we can find all trys by walking backwards from :enter statements,
+    # and all catches by looking at the statement after the :enter
+    n = length(code)
+    empty!(ip)
+    ip.offset = 0 # for _bits_findnext
+    push!(ip, n + 1)
+    handler_at = fill(0, n)
+
+    # start from all :enter statements and record the location of the try
+    for pc = 1:n
+        stmt = code[pc]
+        if isexpr(stmt, :enter)
+            l = stmt.args[1]::Int
+            handler_at[pc + 1] = pc
+            push!(ip, pc + 1)
+            handler_at[l] = pc
+            push!(ip, l)
+        end
+    end
+
+    # now forward those marks to all :leave statements
+    pc´´ = 0
+    while true
+        # make progress on the active ip set
+        pc = _bits_findnext(ip.bits, pc´´)::Int
+        pc > n && break
+        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
+            pc´ = pc + 1 # next program-counter (after executing instruction)
+            if pc == pc´´
+                pc´´ = pc´
+            end
+            delete!(ip, pc)
+            cur_hand = handler_at[pc]
+            @assert cur_hand != 0 "unbalanced try/catch"
+            stmt = code[pc]
+            if isa(stmt, GotoNode)
+                pc´ = stmt.label
+            elseif isa(stmt, GotoIfNot)
+                l = stmt.dest::Int
+                if handler_at[l] != cur_hand
+                    @assert handler_at[l] == 0 "unbalanced try/catch"
+                    handler_at[l] = cur_hand
+                    if l < pc´´
+                        pc´´ = l
+                    end
+                    push!(ip, l)
+                end
+            elseif isa(stmt, ReturnNode)
+                @assert !isdefined(stmt, :val) "unbalanced try/catch"
+                break
+            elseif isa(stmt, Expr)
+                head = stmt.head
+                if head === :enter
+                    cur_hand = pc
+                elseif head === :leave
+                    l = stmt.args[1]::Int
+                    for i = 1:l
+                        cur_hand = handler_at[cur_hand]
+                    end
+                    cur_hand == 0 && break
+                end
+            end
+
+            pc´ > n && break # can't proceed with the fast-path fall-through
+            if handler_at[pc´] != cur_hand
+                @assert handler_at[pc´] == 0 "unbalanced try/catch"
+                handler_at[pc´] = cur_hand
+            elseif !in(pc´, ip)
+                break  # already visited
+            end
+            pc = pc´
+        end
+    end
+
+    @assert first(ip) == n + 1
+    return handler_at
+end
+
+
 """
     Iterate through all callers of the given InferenceState in the abstract
     interpretation stack (including the given InferenceState itself), vising
@@ -149,12 +223,12 @@ end
 
 method_table(interp::AbstractInterpreter, sv::InferenceState) = sv.method_table
 
-function InferenceState(result::InferenceResult, cached::Bool, interp::AbstractInterpreter)
+function InferenceState(result::InferenceResult, cache::Symbol, interp::AbstractInterpreter)
     # prepare an InferenceState object for inferring lambda
     src = retrieve_code_info(result.linfo)
     src === nothing && return nothing
     validate_code_in_debug_mode(result.linfo, src, "lowered")
-    return InferenceState(result, src, cached, interp)
+    return InferenceState(result, src, cache, interp)
 end
 
 function sptypes_from_meth_instance(linfo::MethodInstance)
@@ -185,7 +259,7 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
             while temp isa UnionAll
                 temp = temp.body
             end
-            sigtypes = temp.parameters
+            sigtypes = (temp::DataType).parameters
             for j = 1:length(sigtypes)
                 tj = sigtypes[j]
                 if isType(tj) && tj.parameters[1] === Pi
@@ -302,3 +376,5 @@ function print_callstack(sv::InferenceState)
         sv = sv.parent
     end
 end
+
+get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
diff --git a/base/compiler/methodtable.jl b/base/compiler/methodtable.jl
index 1a0b4076f3ed94..93020ae6a2639e 100644
--- a/base/compiler/methodtable.jl
+++ b/base/compiler/methodtable.jl
@@ -28,6 +28,17 @@ struct InternalMethodTable <: MethodTableView
     world::UInt
 end
 
+"""
+    struct OverlayMethodTable <: MethodTableView
+
+Overlays the internal method table such that specific queries can be redirected to an
+external table, e.g., to override existing method.
+"""
+struct OverlayMethodTable <: MethodTableView
+    world::UInt
+    mt::Core.MethodTable
+end
+
 """
     struct CachedMethodTable <: MethodTableView
 
@@ -43,25 +54,44 @@ CachedMethodTable(table::T) where T =
         table)
 
 """
-    findall(sig::Type{<:Tuple}, view::MethodTableView; limit=typemax(Int))
+    findall(sig::Type, view::MethodTableView; limit=typemax(Int))
 
 Find all methods in the given method table `view` that are applicable to the
 given signature `sig`. If no applicable methods are found, an empty result is
-returned. If the number of applicable methods exeeded the specified limit,
+returned. If the number of applicable methods exceeded the specified limit,
 `missing` is returned.
 """
-function findall(@nospecialize(sig::Type{<:Tuple}), table::InternalMethodTable; limit::Int=typemax(Int))
+function findall(@nospecialize(sig::Type), table::InternalMethodTable; limit::Int=typemax(Int))
+    _min_val = RefValue{UInt}(typemin(UInt))
+    _max_val = RefValue{UInt}(typemax(UInt))
+    _ambig = RefValue{Int32}(0)
+    ms = _methods_by_ftype(sig, nothing, limit, table.world, false, _min_val, _max_val, _ambig)
+    if ms === false
+        return missing
+    end
+    return MethodLookupResult(ms::Vector{Any}, WorldRange(_min_val[], _max_val[]), _ambig[] != 0)
+end
+
+function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int=typemax(Int))
     _min_val = RefValue{UInt}(typemin(UInt))
     _max_val = RefValue{UInt}(typemax(UInt))
     _ambig = RefValue{Int32}(0)
-    ms = _methods_by_ftype(sig, limit, table.world, false, _min_val, _max_val, _ambig)
+    ms = _methods_by_ftype(sig, table.mt, limit, table.world, false, _min_val, _max_val, _ambig)
+    if ms === false
+        return missing
+    elseif isempty(ms)
+        # fall back to the internal method table
+        _min_val[] = typemin(UInt)
+        _max_val[] = typemax(UInt)
+        ms = _methods_by_ftype(sig, nothing, limit, table.world, false, _min_val, _max_val, _ambig)
+    end
     if ms === false
         return missing
     end
     return MethodLookupResult(ms::Vector{Any}, WorldRange(_min_val[], _max_val[]), _ambig[] != 0)
 end
 
-function findall(@nospecialize(sig::Type{<:Tuple}), table::CachedMethodTable; limit::Int=typemax(Int))
+function findall(@nospecialize(sig::Type), table::CachedMethodTable; limit::Int=typemax(Int))
     box = Core.Box(sig)
     return get!(table.cache, sig) do
         findall(box.contents, table.table; limit=limit)
@@ -69,7 +99,7 @@ function findall(@nospecialize(sig::Type{<:Tuple}), table::CachedMethodTable; li
 end
 
 """
-    findsup(sig::Type{<:Tuple}, view::MethodTableView)::Union{Tuple{MethodMatch, WorldRange}, Nothing}
+    findsup(sig::Type, view::MethodTableView)::Union{Tuple{MethodMatch, WorldRange}, Nothing}
 
 Find the (unique) method `m` such that `sig <: m.sig`, while being more
 specific than any other method with the same property. In other words, find
@@ -82,7 +112,7 @@ Such a method `m` need not exist. It is possible that no method is an
 upper bound of `sig`, or it is possible that among the upper bounds, there
 is no least element. In both cases `nothing` is returned.
 """
-function findsup(@nospecialize(sig::Type{<:Tuple}), table::InternalMethodTable)
+function findsup(@nospecialize(sig::Type), table::InternalMethodTable)
     min_valid = RefValue{UInt}(typemin(UInt))
     max_valid = RefValue{UInt}(typemax(UInt))
     result = ccall(:jl_gf_invoke_lookup_worlds, Any, (Any, UInt, Ptr{Csize_t}, Ptr{Csize_t}),
@@ -92,4 +122,4 @@ function findsup(@nospecialize(sig::Type{<:Tuple}), table::InternalMethodTable)
 end
 
 # This query is not cached
-findsup(sig::Type{<:Tuple}, table::CachedMethodTable) = findsup(sig, table.table)
+findsup(@nospecialize(sig::Type), table::CachedMethodTable) = findsup(sig, table.table)
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index fd3b084858053e..7965dcbb2d7797 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -21,32 +21,48 @@ function push!(et::EdgeTracker, ci::CodeInstance)
     push!(et, ci.def)
 end
 
-struct InliningState{S <: Union{EdgeTracker, Nothing}, T, P}
+struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter}
     params::OptimizationParams
     et::S
     mi_cache::T
-    policy::P
+    interp::I
 end
 
-function default_inlining_policy(@nospecialize(src))
+function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8,
+                         mi::MethodInstance, argtypes::Vector{Any})
     if isa(src, CodeInfo) || isa(src, Vector{UInt8})
         src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
-        src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
+        src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
         return src_inferred && src_inlineable ? src : nothing
+    elseif isa(src, OptimizationState) && isdefined(src, :ir)
+        return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing
+    elseif src === nothing && is_stmt_inline(stmt_flag)
+        # if this statement is forced to be inlined, make an additional effort to find the
+        # inferred source in the local cache
+        # we still won't find a source for recursive call because the "single-level" inlining
+        # seems to be more trouble and complex than it's worth
+        inf_result = cache_lookup(mi, argtypes, get_inference_cache(interp))
+        inf_result === nothing && return nothing
+        src = inf_result.src
+        if isa(src, CodeInfo)
+            src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
+            return src_inferred ? src : nothing
+        elseif isa(src, OptimizationState)
+            return isdefined(src, :ir) ? src.ir : nothing
+        else
+            return nothing
+        end
     end
-    if isa(src, OptimizationState) && isdefined(src, :ir)
-        return src.src.inlineable ? src.ir : nothing
-    end
-    return nothing
 end
 
+include("compiler/ssair/driver.jl")
+
 mutable struct OptimizationState
     linfo::MethodInstance
     src::CodeInfo
-    ir::Any # Union{Nothing, IRCode}
+    ir::Union{Nothing, IRCode}
     stmt_info::Vector{Any}
     mod::Module
-    nargs::Int
     sptypes::Vector{Any} # static parameters
     slottypes::Vector{Any}
     const_api::Bool
@@ -56,9 +72,9 @@ mutable struct OptimizationState
         inlining = InliningState(params,
             EdgeTracker(s_edges, frame.valid_worlds),
             WorldView(code_cache(interp), frame.world),
-            inlining_policy(interp))
+            interp)
         return new(frame.linfo,
-                   frame.src, nothing, frame.stmt_info, frame.mod, frame.nargs,
+                   frame.src, nothing, frame.stmt_info, frame.mod,
                    frame.sptypes, frame.slottypes, false,
                    inlining)
     end
@@ -68,6 +84,8 @@ mutable struct OptimizationState
         nssavalues = src.ssavaluetypes
         if nssavalues isa Int
             src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
+        else
+            nssavalues = length(src.ssavaluetypes)
         end
         nslots = length(src.slotflags)
         slottypes = src.slottypes
@@ -76,26 +94,19 @@ mutable struct OptimizationState
         end
         stmt_info = Any[nothing for i = 1:nssavalues]
         # cache some useful state computations
-        toplevel = !isa(linfo.def, Method)
-        if !toplevel
-            meth = linfo.def
-            inmodule = meth.module
-            nargs = meth.nargs
-        else
-            inmodule = linfo.def::Module
-            nargs = 0
-        end
+        def = linfo.def
+        mod = isa(def, Method) ? def.module : def
         # Allow using the global MI cache, but don't track edges.
         # This method is mostly used for unit testing the optimizer
         inlining = InliningState(params,
             nothing,
             WorldView(code_cache(interp), get_world_counter()),
-            inlining_policy(interp))
+            interp)
         return new(linfo,
-                   src, nothing, stmt_info, inmodule, nargs,
+                   src, nothing, stmt_info, mod,
                    sptypes_from_meth_instance(linfo), slottypes, false,
                    inlining)
-        end
+    end
 end
 
 function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter)
@@ -105,20 +116,17 @@ function OptimizationState(linfo::MethodInstance, params::OptimizationParams, in
 end
 
 function ir_to_codeinf!(opt::OptimizationState)
-    replace_code_newstyle!(opt.src, opt.ir, opt.nargs - 1)
+    (; linfo, src) = opt
+    optdef = linfo.def
+    replace_code_newstyle!(src, opt.ir::IRCode, isa(optdef, Method) ? Int(optdef.nargs) : 0)
     opt.ir = nothing
-    let src = opt.src::CodeInfo
-        widen_all_consts!(src)
-        src.inferred = true
-        # finish updating the result struct
-        validate_code_in_debug_mode(opt.linfo, src, "optimized")
-        return src
-    end
+    widen_all_consts!(src)
+    src.inferred = true
+    # finish updating the result struct
+    validate_code_in_debug_mode(linfo, src, "optimized")
+    return src
 end
 
-include("compiler/ssair/driver.jl")
-
-
 #############
 # constants #
 #############
@@ -132,9 +140,15 @@ const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
 const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
 # const SLOT_CALLED      = 64
 
-# This statement was marked as @inbounds by the user. If replaced by inlining,
-# any contained boundschecks may be removed
-const IR_FLAG_INBOUNDS       = 0x01
+# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
+
+# This statement is marked as @inbounds by user.
+# Ff replaced by inlining, any contained boundschecks may be removed.
+const IR_FLAG_INBOUNDS       = 0x01 << 0
+# This statement is marked as @inline by user
+const IR_FLAG_INLINE         = 0x01 << 1
+# This statement is marked as @noinline by user
+const IR_FLAG_NOINLINE       = 0x01 << 2
 # This statement may be removed if its result is unused. In particular it must
 # thus be both pure and effect free.
 const IR_FLAG_EFFECT_FREE    = 0x01 << 4
@@ -175,11 +189,14 @@ function isinlineable(m::Method, me::OptimizationState, params::OptimizationPara
         end
     end
     if !inlineable
-        inlineable = inline_worthy(me.ir, params, union_penalties, cost_threshold + bonus)
+        inlineable = inline_worthy(me.ir::IRCode, params, union_penalties, cost_threshold + bonus)
     end
     return inlineable
 end
 
+is_stmt_inline(stmt_flag::UInt8)   = stmt_flag & IR_FLAG_INLINE   != 0
+is_stmt_noinline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_NOINLINE != 0
+
 # These affect control flow within the function (so may not be removed
 # if there is no usage within the function), but don't affect the purity
 # of the function as a whole.
@@ -197,10 +214,10 @@ function stmt_affects_purity(@nospecialize(stmt), ir)
     return true
 end
 
-# Convert IRCode back to CodeInfo and compute inlining cost and sideeffects
-function finish(interp::AbstractInterpreter, opt::OptimizationState, params::OptimizationParams, ir, @nospecialize(result))
-    def = opt.linfo.def
-    nargs = Int(opt.nargs) - 1
+# compute inlining cost and sideeffects
+function finish(interp::AbstractInterpreter, opt::OptimizationState, params::OptimizationParams, ir::IRCode, @nospecialize(result))
+    (; src, linfo) = opt
+    (; def, specTypes) = linfo
 
     force_noinline = _any(@nospecialize(x) -> isexpr(x, :meta) && x.args[1] === :noinline, ir.meta)
 
@@ -222,7 +239,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
                 end
             end
             if proven_pure
-                for fl in opt.src.slotflags
+                for fl in src.slotflags
                     if (fl & SLOT_USEDUNDEF) != 0
                         proven_pure = false
                         break
@@ -231,7 +248,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
             end
         end
         if proven_pure
-            opt.src.pure = true
+            src.pure = true
         end
 
         if proven_pure
@@ -244,7 +261,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
             if !(isa(result, Const) && !is_inlineable_constant(result.val))
                 opt.const_api = true
             end
-            force_noinline || (opt.src.inlineable = true)
+            force_noinline || (src.inlineable = true)
         end
     end
 
@@ -253,7 +270,7 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
     # determine and cache inlineability
     union_penalties = false
     if !force_noinline
-        sig = unwrap_unionall(opt.linfo.specTypes)
+        sig = unwrap_unionall(specTypes)
         if isa(sig, DataType) && sig.name === Tuple.name
             for P in sig.parameters
                 P = unwrap_unionall(P)
@@ -265,25 +282,25 @@ function finish(interp::AbstractInterpreter, opt::OptimizationState, params::Opt
         else
             force_noinline = true
         end
-        if !opt.src.inlineable && result === Union{}
+        if !src.inlineable && result === Union{}
             force_noinline = true
         end
     end
     if force_noinline
-        opt.src.inlineable = false
+        src.inlineable = false
     elseif isa(def, Method)
-        if opt.src.inlineable && isdispatchtuple(opt.linfo.specTypes)
+        if src.inlineable && isdispatchtuple(specTypes)
             # obey @inline declaration if a dispatch barrier would not help
         else
             bonus = 0
             if result ⊑ Tuple && !isconcretetype(widenconst(result))
                 bonus = params.inline_tupleret_bonus
             end
-            if opt.src.inlineable
+            if src.inlineable
                 # For functions declared @inline, increase the cost threshold 20x
                 bonus += params.inline_cost_threshold*19
             end
-            opt.src.inlineable = isinlineable(def, opt, params, union_penalties, bonus)
+            src.inlineable = isinlineable(def, opt, params, union_penalties, bonus)
         end
     end
 
@@ -292,11 +309,114 @@ end
 
 # run the optimization work
 function optimize(interp::AbstractInterpreter, opt::OptimizationState, params::OptimizationParams, @nospecialize(result))
-    nargs = Int(opt.nargs) - 1
-    @timeit "optimizer" ir = run_passes(opt.src, nargs, opt)
+    @timeit "optimizer" ir = run_passes(opt.src, opt)
     finish(interp, opt, params, ir, result)
 end
 
+function run_passes(ci::CodeInfo, sv::OptimizationState)
+    preserve_coverage = coverage_enabled(sv.mod)
+    ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, sv)
+    ir = slot2reg(ir, ci, sv)
+    #@Base.show ("after_construct", ir)
+    # TODO: Domsorting can produce an updated domtree - no need to recompute here
+    @timeit "compact 1" ir = compact!(ir)
+    @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
+    #@timeit "verify 2" verify_ir(ir)
+    ir = compact!(ir)
+    #@Base.show ("before_sroa", ir)
+    @timeit "SROA" ir = getfield_elim_pass!(ir)
+    #@Base.show ir.new_nodes
+    #@Base.show ("after_sroa", ir)
+    ir = adce_pass!(ir)
+    #@Base.show ("after_adce", ir)
+    @timeit "type lift" ir = type_lift_pass!(ir)
+    @timeit "compact 3" ir = compact!(ir)
+    #@Base.show ir
+    if JLOptions().debug_level == 2
+        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
+    end
+    return ir
+end
+
+function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv::OptimizationState)
+    # Go through and add an unreachable node after every
+    # Union{} call. Then reindex labels.
+    idx = 1
+    oldidx = 1
+    changemap = fill(0, length(code))
+    labelmap = coverage ? fill(0, length(code)) : changemap
+    prevloc = zero(eltype(ci.codelocs))
+    stmtinfo = sv.stmt_info
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
+    while idx <= length(code)
+        codeloc = ci.codelocs[idx]
+        if coverage && codeloc != prevloc && codeloc != 0
+            # insert a side-effect instruction before the current instruction in the same basic block
+            insert!(code, idx, Expr(:code_coverage_effect))
+            insert!(ci.codelocs, idx, codeloc)
+            insert!(ssavaluetypes, idx, Nothing)
+            insert!(stmtinfo, idx, nothing)
+            changemap[oldidx] += 1
+            if oldidx < length(labelmap)
+                labelmap[oldidx + 1] += 1
+            end
+            idx += 1
+            prevloc = codeloc
+        end
+        if code[idx] isa Expr && ssavaluetypes[idx] === Union{}
+            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
+                # insert unreachable in the same basic block after the current instruction (splitting it)
+                insert!(code, idx + 1, ReturnNode())
+                insert!(ci.codelocs, idx + 1, ci.codelocs[idx])
+                insert!(ssavaluetypes, idx + 1, Union{})
+                insert!(stmtinfo, idx + 1, nothing)
+                if oldidx < length(changemap)
+                    changemap[oldidx + 1] += 1
+                    coverage && (labelmap[oldidx + 1] += 1)
+                end
+                idx += 1
+            end
+        end
+        idx += 1
+        oldidx += 1
+    end
+    renumber_ir_elements!(code, changemap, labelmap)
+
+    meta = Any[]
+    for i = 1:length(code)
+        code[i] = remove_meta!(code[i], meta)
+    end
+    strip_trailing_junk!(ci, code, stmtinfo)
+    cfg = compute_basic_blocks(code)
+    types = Any[]
+    stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, ci.ssaflags)
+    ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable::Union{Vector{LineInfoNode},Vector{Any}}), sv.slottypes, meta, sv.sptypes)
+    return ir
+end
+
+function remove_meta!(@nospecialize(stmt), meta::Vector{Any})
+    if isa(stmt, Expr)
+        head = stmt.head
+        if head === :meta
+            args = stmt.args
+            if length(args) > 0
+                push!(meta, stmt)
+            end
+            return nothing
+        end
+    end
+    return stmt
+end
+
+function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
+    # need `ci` for the slot metadata, IR for the code
+    svdef = sv.linfo.def
+    nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0
+    @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
+    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
+    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, sv.slottypes) # consumes `ir`
+    return ir
+end
 
 # whether `f` is pure for inference
 function is_pure_intrinsic_infer(f::IntrinsicFunction)
@@ -355,6 +475,11 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
                 # tuple iteration/destructuring makes that impossible
                 # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
                 return 0
+            elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3
+                atyp = argextype(ex.args[3], src, sptypes, slottypes)
+                return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes, slottypes)))
+                return 1
             elseif f === Core.isa
                 # If we're in a union context, we penalize type computations
                 # on union types. In such cases, it is usually better to perform
@@ -362,13 +487,10 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
                 if union_penalties && isa(argextype(ex.args[2],  src, sptypes, slottypes), Union)
                     return params.inline_nonleaf_penalty
                 end
-            elseif (f === Core.arrayref || f === Core.const_arrayref) && length(ex.args) >= 3
-                atyp = argextype(ex.args[3], src, sptypes, slottypes)
-                return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
             end
             fidx = find_tfunc(f)
             if fidx === nothing
-                # unknown/unhandled builtin or anonymous function
+                # unknown/unhandled builtin
                 # Use the generic cost of a direct function call
                 return 20
             end
@@ -379,7 +501,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
             return 0
         end
         return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-    elseif head === :foreigncall || head === :invoke
+    elseif head === :foreigncall || head === :invoke || head == :invoke_modify
         # Calls whose "return type" is Union{} do not actually return:
         # they are errors. Since these are not part of the typical
         # run-time of the function, we omit them from
@@ -442,12 +564,14 @@ function inline_worthy(ir::IRCode,
     return true
 end
 
-function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::CodeInfo, sptypes::Vector{Any}, unionpenalties::Bool, params::OptimizationParams)
+function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any}, unionpenalties::Bool, params::OptimizationParams)
     throw_blocks = params.unoptimize_throw_blocks ? find_throw_blocks(body) : nothing
     maxcost = 0
     for line = 1:length(body)
         stmt = body[line]
-        thiscost = statement_or_branch_cost(stmt, line, src, sptypes, src.slottypes, unionpenalties, params, throw_blocks)
+        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
+                                            src isa CodeInfo ? src.slottypes : src.argtypes,
+                                            unionpenalties, params, throw_blocks)
         cost[line] = thiscost
         if thiscost > maxcost
             maxcost = thiscost
@@ -468,14 +592,23 @@ function renumber_ir_elements!(body::Vector{Any}, changemap::Vector{Int})
     return renumber_ir_elements!(body, changemap, changemap)
 end
 
-function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
-    for i = 2:length(labelchangemap)
-        labelchangemap[i] += labelchangemap[i - 1]
+function cumsum_ssamap!(ssamap::Vector{Int})
+    rel_change = 0
+    for i = 1:length(ssamap)
+        rel_change += ssamap[i]
+        if ssamap[i] == -1
+            # Keep a marker that this statement was deleted
+            ssamap[i] = typemin(Int)
+        else
+            ssamap[i] = rel_change
+        end
     end
+end
+
+function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
+    cumsum_ssamap!(labelchangemap)
     if ssachangemap !== labelchangemap
-        for i = 2:length(ssachangemap)
-            ssachangemap[i] += ssachangemap[i - 1]
-        end
+        cumsum_ssamap!(ssachangemap)
     end
     if labelchangemap[end] == 0 && ssachangemap[end] == 0
         return
@@ -491,11 +624,32 @@ function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, lab
             end
             body[i] = GotoIfNot(cond, el.dest + labelchangemap[el.dest])
         elseif isa(el, ReturnNode)
-            if isdefined(el, :val) && isa(el.val, SSAValue)
-                body[i] = ReturnNode(SSAValue(el.val.id + ssachangemap[el.val.id]))
+            if isdefined(el, :val)
+                val = el.val
+                if isa(val, SSAValue)
+                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
+                end
             end
         elseif isa(el, SSAValue)
             body[i] = SSAValue(el.id + ssachangemap[el.id])
+        elseif isa(el, PhiNode)
+            i = 1
+            edges = el.edges
+            values = el.values
+            while i <= length(edges)
+                was_deleted = ssachangemap[edges[i]] == typemin(Int)
+                if was_deleted
+                    deleteat!(edges, i)
+                    deleteat!(values, i)
+                else
+                    edges[i] += ssachangemap[edges[i]]
+                    val = values[i]
+                    if isa(val, SSAValue)
+                        values[i] = SSAValue(val.id + ssachangemap[val.id])
+                    end
+                    i += 1
+                end
+            end
         elseif isa(el, Expr)
             if el.head === :(=) && el.args[2] isa Expr
                 el = el.args[2]::Expr
diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index 1ab2876b769da5..fd49a7e118eb76 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -244,7 +244,7 @@ function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int)
 end
 
 """
-The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. Note that the
+The main Semi-NCA algorithm. Matches Figure 2.8 in [LG05]. Note that the
 pseudocode in [LG05] is not entirely accurate. The best way to understand
 what's happening is to read [LT79], then the description of SLT in [LG05]
 (warning: inconsistent notation), then the description of Semi-NCA.
@@ -593,7 +593,7 @@ function naive_idoms(blocks::Vector{BasicBlock})
             if isempty(blocks[n].preds)
                 continue
             end
-            firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds))
+            firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds))::NTuple{2,Any}
             new_doms = copy(dominators[firstp])
             for p in rest
                 intersect!(new_doms, dominators[p])
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index ff8721205b8c3a..9a6071766271eb 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -20,126 +20,3 @@ include("compiler/ssair/inlining.jl")
 include("compiler/ssair/verify.jl")
 include("compiler/ssair/legacy.jl")
 #@isdefined(Base) && include("compiler/ssair/show.jl")
-
-function normalize(@nospecialize(stmt), meta::Vector{Any})
-    if isa(stmt, Expr)
-        if stmt.head === :meta
-            args = stmt.args
-            if length(args) > 0
-                push!(meta, stmt)
-            end
-            return nothing
-        end
-    end
-    return stmt
-end
-
-function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, nargs::Int, sv::OptimizationState)
-    # Go through and add an unreachable node after every
-    # Union{} call. Then reindex labels.
-    idx = 1
-    oldidx = 1
-    changemap = fill(0, length(code))
-    labelmap = coverage ? fill(0, length(code)) : changemap
-    prevloc = zero(eltype(ci.codelocs))
-    stmtinfo = sv.stmt_info
-    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
-    while idx <= length(code)
-        codeloc = ci.codelocs[idx]
-        if coverage && codeloc != prevloc && codeloc != 0
-            # insert a side-effect instruction before the current instruction in the same basic block
-            insert!(code, idx, Expr(:code_coverage_effect))
-            insert!(ci.codelocs, idx, codeloc)
-            insert!(ssavaluetypes, idx, Nothing)
-            insert!(stmtinfo, idx, nothing)
-            changemap[oldidx] += 1
-            if oldidx < length(labelmap)
-                labelmap[oldidx + 1] += 1
-            end
-            idx += 1
-            prevloc = codeloc
-        end
-        if code[idx] isa Expr && ssavaluetypes[idx] === Union{}
-            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
-                # insert unreachable in the same basic block after the current instruction (splitting it)
-                insert!(code, idx + 1, ReturnNode())
-                insert!(ci.codelocs, idx + 1, ci.codelocs[idx])
-                insert!(ssavaluetypes, idx + 1, Union{})
-                insert!(stmtinfo, idx + 1, nothing)
-                if oldidx < length(changemap)
-                    changemap[oldidx + 1] += 1
-                    coverage && (labelmap[oldidx + 1] += 1)
-                end
-                idx += 1
-            end
-        end
-        idx += 1
-        oldidx += 1
-    end
-    renumber_ir_elements!(code, changemap, labelmap)
-
-    inbounds_depth = 0 # Number of stacked inbounds
-    meta = Any[]
-    flags = fill(0x00, length(code))
-    for i = 1:length(code)
-        stmt = code[i]
-        if isexpr(stmt, :inbounds)
-            arg1 = stmt.args[1]
-            if arg1 === true # push
-                inbounds_depth += 1
-            elseif arg1 === false # clear
-                inbounds_depth = 0
-            elseif inbounds_depth > 0 # pop
-                inbounds_depth -= 1
-            end
-            stmt = nothing
-        else
-            stmt = normalize(stmt, meta)
-        end
-        code[i] = stmt
-        if !(stmt === nothing)
-            if inbounds_depth > 0
-                flags[i] |= IR_FLAG_INBOUNDS
-            end
-        end
-    end
-    strip_trailing_junk!(ci, code, stmtinfo, flags)
-    cfg = compute_basic_blocks(code)
-    types = Any[]
-    stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, flags)
-    ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable), sv.slottypes, meta, sv.sptypes)
-    return ir
-end
-
-function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState)
-    # need `ci` for the slot metadata, IR for the code
-    @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
-    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
-    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.slottypes) # consumes `ir`
-    return ir
-end
-
-function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState)
-    preserve_coverage = coverage_enabled(sv.mod)
-    ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, nargs, sv)
-    ir = slot2reg(ir, ci, nargs, sv)
-    #@Base.show ("after_construct", ir)
-    # TODO: Domsorting can produce an updated domtree - no need to recompute here
-    @timeit "compact 1" ir = compact!(ir)
-    @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
-    #@timeit "verify 2" verify_ir(ir)
-    ir = compact!(ir)
-    #@Base.show ("before_sroa", ir)
-    @timeit "SROA" ir = getfield_elim_pass!(ir)
-    #@Base.show ir.new_nodes
-    #@Base.show ("after_sroa", ir)
-    ir = adce_pass!(ir)
-    #@Base.show ("after_adce", ir)
-    @timeit "type lift" ir = type_lift_pass!(ir)
-    @timeit "compact 3" ir = compact!(ir)
-    #@Base.show ir
-    if JLOptions().debug_level == 2
-        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
-    end
-    return ir
-end
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index f95ff55b75e847..33e370e4db7e4f 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -103,7 +103,7 @@ function CFGInliningState(ir::IRCode)
 end
 
 # Tells the inliner that we're now inlining into block `block`, meaning
-# all previous blocks have been proceesed and can be added to the new cfg
+# all previous blocks have been processed and can be added to the new cfg
 function inline_into_block!(state::CFGInliningState, block::Int)
     if state.first_bb != block
         new_range = state.first_bb+1:block
@@ -313,14 +313,15 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         push!(linetable, LineInfoNode(entry.module, entry.method, entry.file, entry.line,
             (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset : inlined_at)))
     end
-    nargs_def = item.mi.def.nargs
-    isva = nargs_def > 0 && item.mi.def.isva
+    (; def, sparam_vals) = item.mi
+    nargs_def = def.nargs::Int32
+    isva = nargs_def > 0 && def.isva
+    sig = def.sig
     if isva
         vararg = mk_tuplecall!(compact, argexprs[nargs_def:end], compact.result[idx][:line])
         argexprs = Any[argexprs[1:(nargs_def - 1)]..., vararg]
     end
-    mi = item.mi
-    is_opaque = isa(mi.def, Method) && mi.def.is_for_opaque_closure
+    is_opaque = isa(def, Method) && def.is_for_opaque_closure
     if is_opaque
         # Replace the first argument by a load of the capture environment
         argexprs[1] = insert_node_here!(compact,
@@ -347,15 +348,15 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
             # face of rename_arguments! mutating in place - should figure out
             # something better eventually.
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.mi.def.sig, item.mi.sparam_vals, linetable_offset, boundscheck_idx, compact)
+            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck_idx, compact)
             if isa(stmt′, ReturnNode)
-                isa(stmt′.val, SSAValue) && (compact.used_ssas[stmt′.val.id] += 1)
-                return_value = SSAValue(idx′)
-                inline_compact[idx′] = stmt′.val
                 val = stmt′.val
+                isa(val, SSAValue) && (compact.used_ssas[val.id] += 1)
+                return_value = SSAValue(idx′)
+                inline_compact[idx′] = val
                 inline_compact.result[idx′][:type] = (isa(val, Argument) || isa(val, Expr)) ?
-                    compact_exprtype(compact, stmt′.val) :
-                    compact_exprtype(inline_compact, stmt′.val)
+                    compact_exprtype(compact, val) :
+                    compact_exprtype(inline_compact, val)
                 break
             end
             inline_compact[idx′] = stmt′
@@ -374,7 +375,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.mi.def.sig, item.mi.sparam_vals, linetable_offset, boundscheck_idx, compact)
+            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck_idx, compact)
             if isa(stmt′, ReturnNode)
                 if isdefined(stmt′, :val)
                     val = stmt′.val
@@ -399,7 +400,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
             elseif isa(stmt′, GotoNode)
                 stmt′ = GotoNode(stmt′.label + bb_offset)
             elseif isa(stmt′, Expr) && stmt′.head === :enter
-                stmt′ = Expr(:enter, stmt′.args[1] + bb_offset)
+                stmt′ = Expr(:enter, stmt′.args[1]::Int + bb_offset)
             elseif isa(stmt′, GotoIfNot)
                 stmt′ = GotoIfNot(stmt′.cond, stmt′.dest + bb_offset)
             elseif isa(stmt′, PhiNode)
@@ -412,8 +413,9 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         compact.active_result_bb = inline_compact.active_result_bb
         for i = 1:length(pn.values)
             isassigned(pn.values, i) || continue
-            if isa(pn.values[i], SSAValue)
-                compact.used_ssas[pn.values[i].id] += 1
+            v = pn.values[i]
+            if isa(v, SSAValue)
+                compact.used_ssas[v.id] += 1
             end
         end
         if length(pn.edges) == 1
@@ -442,9 +444,10 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
     for ((metharg, case), next_cond_bb) in zip(item.cases, item.bbs)
         @assert !isa(metharg, UnionAll)
         cond = true
-        @assert length(atype.parameters) == length(metharg.parameters)
-        for i in 1:length(atype.parameters)
-            a, m = atype.parameters[i], metharg.parameters[i]
+        aparams, mparams = atype.parameters::SimpleVector, metharg.parameters::SimpleVector
+        @assert length(aparams) == length(mparams)
+        for i in 1:length(aparams)
+            a, m = aparams[i], mparams[i]
             # If this is always true, we don't need to check for it
             a <: m && continue
             # Generate isa check
@@ -463,8 +466,8 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
         argexprs′ = argexprs
         if !isa(case, ConstantCase)
             argexprs′ = copy(argexprs)
-            for i = 1:length(metharg.parameters)
-                a, m = atype.parameters[i], metharg.parameters[i]
+            for i = 1:length(mparams)
+                a, m = aparams[i], mparams[i]
                 (isa(argexprs[i], SSAValue) || isa(argexprs[i], Argument)) || continue
                 if !(a <: m)
                     argexprs′[i] = insert_node_here!(compact,
@@ -549,6 +552,7 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
         (inline_idx, item) = popfirst!(todo)
         for ((old_idx, idx), stmt) in compact
             if old_idx == inline_idx
+                stmt = stmt::Expr
                 argexprs = copy(stmt.args)
                 refinish = false
                 if compact.result_idx == first(compact.result_bbs[compact.active_result_bb].stmts)
@@ -582,7 +586,7 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
             elseif isa(stmt, GotoNode)
                 compact[idx] = GotoNode(state.bb_rename[stmt.label])
             elseif isa(stmt, Expr) && stmt.head === :enter
-                compact[idx] = Expr(:enter, state.bb_rename[stmt.args[1]])
+                compact[idx] = Expr(:enter, state.bb_rename[stmt.args[1]::Int])
             elseif isa(stmt, GotoIfNot)
                 compact[idx] = GotoIfNot(stmt.cond, state.bb_rename[stmt.dest])
             elseif isa(stmt, PhiNode)
@@ -600,6 +604,7 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::
         argexprs::Vector{Any}, atypes::Vector{Any}, arginfos::Vector{Any},
         arg_start::Int, istate::InliningState)
 
+    flag = ir.stmts[idx][:flag]
     new_argexprs = Any[argexprs[arg_start]]
     new_atypes = Any[atypes[arg_start]]
     # loop over original arguments and flatten any known iterators
@@ -655,8 +660,8 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::
                 info = call.info
                 handled = false
                 if isa(info, ConstCallInfo)
-                    if maybe_handle_const_call!(ir, state1.id, new_stmt, info, new_sig,
-                        call.rt, istate, false, todo)
+                    if !is_stmt_noinline(flag) && maybe_handle_const_call!(
+                        ir, state1.id, new_stmt, info, new_sig,call.rt, istate, flag, false, todo)
                         handled = true
                     else
                         info = info.call
@@ -667,7 +672,7 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::
                         MethodMatchInfo[info] : info.matches
                     # See if we can inline this call to `iterate`
                     analyze_single_call!(ir, todo, state1.id, new_stmt,
-                        new_sig, call.rt, info, istate)
+                        new_sig, call.rt, info, istate, flag)
                 end
                 if i != length(thisarginfo.each)
                     valT = getfield_tfunc(call.rt, Const(1))
@@ -704,36 +709,35 @@ function singleton_type(@nospecialize(ft))
 end
 
 function compileable_specialization(et::Union{EdgeTracker, Nothing}, match::MethodMatch)
-    mi = specialize_method(match, false, true)
+    mi = specialize_method(match; compilesig=true)
     mi !== nothing && et !== nothing && push!(et, mi::MethodInstance)
     return mi
 end
 
-function compileable_specialization(et::Union{EdgeTracker, Nothing}, result::InferenceResult)
-    mi = specialize_method(result.linfo.def, result.linfo.specTypes,
-        result.linfo.sparam_vals, false, true)
+function compileable_specialization(et::Union{EdgeTracker, Nothing}, (; linfo)::InferenceResult)
+    mi = specialize_method(linfo.def::Method, linfo.specTypes, linfo.sparam_vals; compilesig=true)
     mi !== nothing && et !== nothing && push!(et, mi::MethodInstance)
     return mi
 end
 
-function resolve_todo(todo::InliningTodo, state::InliningState)
-    spec = todo.spec::DelayedInliningSpec
+function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
+    mi = todo.mi
+    (; match, atypes) = todo.spec::DelayedInliningSpec
 
     #XXX: update_valid_age!(min_valid[1], max_valid[1], sv)
     isconst, src = false, nothing
-    if isa(spec.match, InferenceResult)
-        let inferred_src = spec.match.src
-            if isa(inferred_src, Const)
-                if !is_inlineable_constant(inferred_src.val)
-                    return compileable_specialization(state.et, spec.match)
-                end
-                isconst, src = true, quoted(inferred_src.val)
-            else
-                isconst, src = false, inferred_src
+    if isa(match, InferenceResult)
+        inferred_src = match.src
+        if isa(inferred_src, Const)
+            if !is_inlineable_constant(inferred_src.val)
+                return compileable_specialization(state.et, match)
             end
+            isconst, src = true, quoted(inferred_src.val)
+        else
+            isconst, src = false, inferred_src
         end
     else
-        linfo = get(state.mi_cache, todo.mi, nothing)
+        linfo = get(state.mi_cache, mi, nothing)
         if linfo isa CodeInstance
             if invoke_api(linfo) == 2
                 # in this case function can be inlined to a constant
@@ -746,39 +750,30 @@ function resolve_todo(todo::InliningTodo, state::InliningState)
         end
     end
 
-    if isconst && state.et !== nothing
-        push!(state.et, todo.mi)
+    et = state.et
+
+    if isconst && et !== nothing
+        push!(et, mi)
         return ConstantCase(src)
     end
 
-    if src !== nothing
-        src = state.policy(src)
-    end
+    src = inlining_policy(state.interp, src, flag, mi, atypes)
 
     if src === nothing
-        return compileable_specialization(state.et, spec.match)
+        return compileable_specialization(et, match)
     end
 
     if isa(src, IRCode)
         src = copy(src)
     end
 
-    et = state.et
-    et !== nothing && push!(et, todo.mi)
-    return InliningTodo(todo.mi, src)
+    et !== nothing && push!(et, mi)
+    return InliningTodo(mi, src)
 end
 
-function resolve_todo(todo::UnionSplit, state::InliningState)
+function resolve_todo(todo::UnionSplit, state::InliningState, flag::UInt8)
     UnionSplit(todo.fully_covered, todo.atype,
-        Pair{Any,Any}[sig=>resolve_todo(item, state) for (sig, item) in todo.cases])
-end
-
-function resolve_todo!(todo::Vector{Pair{Int, Any}}, state::InliningState)
-    for i = 1:length(todo)
-        idx, item = todo[i]
-        todo[i] = idx=>resolve_todo(item, state)
-    end
-    todo
+        Pair{Any,Any}[sig=>resolve_todo(item, state, flag) for (sig, item) in todo.cases])
 end
 
 function validate_sparams(sparams::SimpleVector)
@@ -789,7 +784,7 @@ function validate_sparams(sparams::SimpleVector)
 end
 
 function analyze_method!(match::MethodMatch, atypes::Vector{Any},
-                         state::InliningState, @nospecialize(stmttyp))
+                         state::InliningState, @nospecialize(stmttyp), flag::UInt8)
     method = match.method
     methsig = method.sig
 
@@ -805,25 +800,25 @@ function analyze_method!(match::MethodMatch, atypes::Vector{Any},
     end
 
     # Bail out if any static parameters are left as TypeVar
-    ok = true
     validate_sparams(match.sparams) || return nothing
 
+    et = state.et
 
-    if !state.params.inlining
-        return compileable_specialization(state.et, match)
+    if !state.params.inlining || is_stmt_noinline(flag)
+        return compileable_specialization(et, match)
     end
 
     # See if there exists a specialization for this method signature
-    mi = specialize_method(match, true) # Union{Nothing, MethodInstance}
+    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
     if !isa(mi, MethodInstance)
-        return compileable_specialization(state.et, match)
+        return compileable_specialization(et, match)
     end
 
     todo = InliningTodo(mi, match, atypes, stmttyp)
     # If we don't have caches here, delay resolving this MethodInstance
     # until the batch inlining step (or an external post-processing pass)
     state.mi_cache === nothing && return todo
-    return resolve_todo(todo, state)
+    return resolve_todo(todo, state, flag)
 end
 
 function InliningTodo(mi::MethodInstance, ir::IRCode)
@@ -929,7 +924,7 @@ function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::Optimizatio
 end
 
 function inline_splatnew!(ir::IRCode, idx::Int)
-    stmt = ir.stmts[idx][:inst]
+    stmt = ir.stmts[idx][:inst]::Expr
     ty = ir.stmts[idx][:type]
     nf = nfields_tfunc(ty)
     if nf isa Const
@@ -940,7 +935,7 @@ function inline_splatnew!(ir::IRCode, idx::Int)
         # TODO: hoisting this tnf.val === nf.val check into codegen
         # would enable us to almost always do this transform
         if tnf isa Const && tnf.val === nf.val
-            n = tnf.val
+            n = tnf.val::Int
             new_argexprs = Any[eargs[1]]
             for j = 1:n
                 atype = getfield_tfunc(tt, Const(j))
@@ -964,7 +959,6 @@ function call_sig(ir::IRCode, stmt::Expr)
     f === Core.Intrinsics.cglobal && return nothing
     atypes = Vector{Any}(undef, length(stmt.args))
     atypes[1] = ft
-    ok = true
     for i = 2:length(stmt.args)
         a = argextype(stmt.args[i], ir, ir.sptypes)
         (a === Bottom || isvarargtype(a)) && return nothing
@@ -1048,12 +1042,12 @@ is_builtin(s::Signature) =
     isa(s.f, Builtin) ||
     s.ft ⊑ Builtin
 
-function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, info::InvokeCallInfo,
-        state::InliningState, todo::Vector{Pair{Int, Any}})
+function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, (; match, result)::InvokeCallInfo,
+        state::InliningState, todo::Vector{Pair{Int, Any}}, flag::UInt8)
     stmt = ir.stmts[idx][:inst]
     calltype = ir.stmts[idx][:type]
 
-    if !info.match.fully_covers
+    if !match.fully_covers
         # TODO: We could union split out the signature check and continue on
         return nothing
     end
@@ -1063,7 +1057,17 @@ function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, info::InvokeCallIn
     atypes = atypes[4:end]
     pushfirst!(atypes, atype0)
 
-    result = analyze_method!(info.match, atypes, state, calltype)
+    if isa(result, InferenceResult) && !is_stmt_noinline(flag)
+        (; mi) = item = InliningTodo(result, atypes, calltype)
+        validate_sparams(mi.sparam_vals) || return nothing
+        if argtypes_to_type(atypes) <: mi.def.sig
+            state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+            handle_single_case!(ir, stmt, idx, item, true, todo)
+            return nothing
+        end
+    end
+
+    result = analyze_method!(match, atypes, state, calltype, flag)
     handle_single_case!(ir, stmt, idx, result, true, todo)
     return nothing
 end
@@ -1137,6 +1141,22 @@ function process_simple!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sta
         ir.stmts[idx][:inst] = res
         return nothing
     end
+    if (sig.f === modifyfield! || sig.ft ⊑ typeof(modifyfield!)) && 5 <= length(stmt.args) <= 6
+        let info = ir.stmts[idx][:info]
+            info isa MethodResultPure && (info = info.info)
+            info isa ConstCallInfo && (info = info.call)
+            info isa MethodMatchInfo || return nothing
+            length(info.results) == 1 || return nothing
+            match = info.results[1]::MethodMatch
+            match.fully_covers || return nothing
+            case = compileable_specialization(state.et, match)
+            case === nothing && return nothing
+            stmt.head = :invoke_modify
+            pushfirst!(stmt.args, case)
+            ir.stmts[idx][:inst] = stmt
+        end
+        return nothing
+    end
 
     check_effect_free!(ir, stmt, calltype, idx)
 
@@ -1158,7 +1178,7 @@ end
 
 function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, @nospecialize(stmt),
         sig::Signature, @nospecialize(calltype), infos::Vector{MethodMatchInfo},
-        state::InliningState)
+        state::InliningState, flag::UInt8)
     cases = Pair{Any, Any}[]
     signature_union = Union{}
     only_method = nothing  # keep track of whether there is one matching method
@@ -1168,7 +1188,7 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int
     for i in 1:length(infos)
         info = infos[i]
         meth = info.results
-        if meth === missing || meth.ambig
+        if meth.ambig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
             too_many = true
@@ -1186,19 +1206,20 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int
             only_method = false
         end
         for match in meth
-            signature_union = Union{signature_union, match.spec_types}
-            if !isdispatchtuple(match.spec_types)
+            spec_types = match.spec_types
+            signature_union = Union{signature_union, spec_types}
+            if !isdispatchtuple(spec_types)
                 fully_covered = false
                 continue
             end
-            case = analyze_method!(match, sig.atypes, state, calltype)
+            case = analyze_method!(match, sig.atypes, state, calltype, flag)
             if case === nothing
                 fully_covered = false
                 continue
-            elseif _any(p->p[1] === match.spec_types, cases)
+            elseif _any(p->p[1] === spec_types, cases)
                 continue
             end
-            push!(cases, Pair{Any,Any}(match.spec_types, case))
+            push!(cases, Pair{Any,Any}(spec_types, case))
         end
     end
 
@@ -1218,7 +1239,7 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int
             match = meth[1]
         end
         fully_covered = true
-        case = analyze_method!(match, sig.atypes, state, calltype)
+        case = analyze_method!(match, sig.atypes, state, calltype, flag)
         case === nothing && return
         push!(cases, Pair{Any,Any}(match.spec_types, case))
     end
@@ -1240,7 +1261,7 @@ end
 
 function maybe_handle_const_call!(ir::IRCode, idx::Int, stmt::Expr,
         info::ConstCallInfo, sig::Signature, @nospecialize(calltype),
-        state::InliningState,
+        state::InliningState, flag::UInt8,
         isinvoke::Bool, todo::Vector{Pair{Int, Any}})
     # when multiple matches are found, bail out and later inliner will union-split this signature
     # TODO effectively use multiple constant analysis results here
@@ -1248,11 +1269,11 @@ function maybe_handle_const_call!(ir::IRCode, idx::Int, stmt::Expr,
     result = info.results[1]
     isa(result, InferenceResult) || return false
 
-    item = InliningTodo(result, sig.atypes, calltype)
-    validate_sparams(item.mi.sparam_vals) || return true
-    mthd_sig = item.mi.def.sig
-    mistypes = item.mi.specTypes
-    state.mi_cache !== nothing && (item = resolve_todo(item, state))
+    (; mi) = item = InliningTodo(result, sig.atypes, calltype)
+    validate_sparams(mi.sparam_vals) || return true
+    mthd_sig = mi.def.sig
+    mistypes = mi.specTypes
+    state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
     if sig.atype <: mthd_sig
         handle_single_case!(ir, stmt, idx, item, isinvoke, todo)
         return true
@@ -1290,6 +1311,8 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
             info = info.info
         end
 
+        flag = ir.stmts[idx][:flag]
+
         # Inference determined this couldn't be analyzed. Don't question it.
         if info === false
             continue
@@ -1299,7 +1322,8 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
         # it'll have performed a specialized analysis for just this case. Use its
         # result.
         if isa(info, ConstCallInfo)
-            if maybe_handle_const_call!(ir, idx, stmt, info, sig, calltype, state, sig.f === Core.invoke, todo)
+            if !is_stmt_noinline(flag) && maybe_handle_const_call!(
+                ir, idx, stmt, info, sig, calltype, state, flag, sig.f === Core.invoke, todo)
                 continue
             else
                 info = info.call
@@ -1307,7 +1331,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
         end
 
         if isa(info, OpaqueClosureCallInfo)
-            result = analyze_method!(info.match, sig.atypes, state, calltype)
+            result = analyze_method!(info.match, sig.atypes, state, calltype, flag)
             handle_single_case!(ir, stmt, idx, result, false, todo)
             continue
         end
@@ -1315,7 +1339,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
         # Handle invoke
         if sig.f === Core.invoke
             if isa(info, InvokeCallInfo)
-                inline_invoke!(ir, idx, sig, info, state, todo)
+                inline_invoke!(ir, idx, sig, info, state, todo, flag)
             end
             continue
         end
@@ -1329,7 +1353,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState)
             continue
         end
 
-        analyze_single_call!(ir, todo, idx, stmt, sig, calltype, infos, state)
+        analyze_single_call!(ir, todo, idx, stmt, sig, calltype, infos, state, flag)
     end
     todo
 end
@@ -1394,7 +1418,8 @@ end
 function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::Expr, params::OptimizationParams)
     f, ft, atypes = sig.f, sig.ft, sig.atypes
     typ = ir.stmts[idx][:type]
-    if params.inlining && length(atypes) == 3 && istopfunction(f, :!==)
+    isinlining = params.inlining
+    if isinlining && length(atypes) == 3 && istopfunction(f, :!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
         # and that works, even though inference generally avoids inferring the `!==` Method
         if isa(typ, Const)
@@ -1406,17 +1431,17 @@ function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::E
         not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa)
         ir[SSAValue(idx)] = not_call
         return true
-    elseif params.inlining && length(atypes) == 3 && istopfunction(f, :(>:))
+    elseif isinlining && length(atypes) == 3 && istopfunction(f, :(>:))
         # special-case inliner for issupertype
         # that works, even though inference generally avoids inferring the `>:` Method
-        if isa(typ, Const)
+        if isa(typ, Const) && _builtin_nothrow(<:, Any[atypes[3], atypes[2]], typ)
             ir[SSAValue(idx)] = quoted(typ.val)
             return true
         end
         subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2])
         ir[SSAValue(idx)] = subtype_call
         return true
-    elseif params.inlining && f === TypeVar && 2 <= length(atypes) <= 4 && (atypes[2] ⊑ Symbol)
+    elseif isinlining && f === TypeVar && 2 <= length(atypes) <= 4 && (atypes[2] ⊑ Symbol)
         ir[SSAValue(idx)] = Expr(:call, GlobalRef(Core, :_typevar), stmt.args[2],
             length(stmt.args) < 4 ? Bottom : stmt.args[3],
             length(stmt.args) == 2 ? Any : stmt.args[end])
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 9983d9b94a44a3..a2eaf5c69cbdd5 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -1,6 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-@inline isexpr(@nospecialize(stmt), head::Symbol) = isa(stmt, Expr) && stmt.head === head
 Core.PhiNode() = Core.PhiNode(Int32[], Any[])
 
 isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode)
@@ -404,7 +403,8 @@ function getindex(x::UseRef)
 end
 
 function is_relevant_expr(e::Expr)
-    return e.head in (:call, :invoke, :new, :splatnew, :(=), :(&),
+    return e.head in (:call, :invoke, :invoke_modify,
+                      :new, :splatnew, :(=), :(&),
                       :gc_preserve_begin, :gc_preserve_end,
                       :foreigncall, :isdefined, :copyast,
                       :undefcheck, :throw_undef_if_not,
@@ -723,18 +723,27 @@ function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction,
         end
     elseif isa(before, OldSSAValue)
         pos = before.id
-        if pos > length(compact.ir.stmts)
-            #@assert attach_after
-            info = compact.pending_nodes.info[pos - length(compact.ir.stmts) - length(compact.ir.new_nodes)]
-            pos, attach_after = info.pos, info.attach_after
+        if pos < compact.idx
+            renamed = compact.ssa_rename[pos]
+            count_added_node!(compact, inst.stmt)
+            line = something(inst.line, compact.result[renamed.id][:line])
+            node = add!(compact.new_new_nodes, renamed.id, attach_after)
+            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
+            return NewSSAValue(node.idx)
+        else
+            if pos > length(compact.ir.stmts)
+                #@assert attach_after
+                info = compact.pending_nodes.info[pos - length(compact.ir.stmts) - length(compact.ir.new_nodes)]
+                pos, attach_after = info.pos, info.attach_after
+            end
+            line = something(inst.line, compact.ir.stmts[pos][:line])
+            node = add_pending!(compact, pos, attach_after)
+            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
+            os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
+            push!(compact.ssa_rename, os)
+            push!(compact.used_ssas, 0)
+            return os
         end
-        line = something(inst.line, compact.ir.stmts[pos][:line])
-        node = add_pending!(compact, pos, attach_after)
-        node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
-        os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
-        push!(compact.ssa_rename, os)
-        push!(compact.used_ssas, 0)
-        return os
     elseif isa(before, NewSSAValue)
         before_entry = compact.new_new_nodes.info[before.id]
         line = something(inst.line, compact.new_new_nodes.stmts[before.id][:line])
@@ -1094,8 +1103,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         # passes look at all code, dead or not. This check should be
         # unnecessary when DCE can remove those dead loops entirely, so this is
         # just to be safe.
-        before_def = isassigned(values, 1) && isa(values[1], OldSSAValue) &&
-            idx < values[1].id
+        before_def = isassigned(values, 1) && (v = values[1]; isa(v, OldSSAValue)) && idx < v.id
         if length(edges) == 1 && isassigned(values, 1) && !before_def &&
                 length(compact.cfg_transforms_enabled ?
                     compact.result_bbs[compact.bb_rename_succ[active_bb]].preds :
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index 49d9aef973e294..a832daa8ed4184 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -40,14 +40,14 @@ end
 function replace_code_newstyle!(ci::CodeInfo, ir::IRCode, nargs::Int)
     @assert isempty(ir.new_nodes)
     # All but the first `nargs` slots will now be unused
-    resize!(ci.slotflags, nargs + 1)
+    resize!(ci.slotflags, nargs)
     stmts = ir.stmts
     ci.code, ci.ssavaluetypes, ci.codelocs, ci.ssaflags, ci.linetable =
         stmts.inst, stmts.type, stmts.line, stmts.flag, ir.linetable
     for metanode in ir.meta
         push!(ci.code, metanode)
         push!(ci.codelocs, 1)
-        push!(ci.ssavaluetypes, Any)
+        push!(ci.ssavaluetypes::Vector{Any}, Any)
         push!(ci.ssaflags, 0x00)
     end
     # Translate BB Edges to statement edges
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index f27c71c2bcd6c6..2d097b3d46b6f4 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -19,8 +19,9 @@ struct SSADefUse
 end
 SSADefUse() = SSADefUse(Int[], Int[], Int[])
 
-function try_compute_fieldidx_expr(@nospecialize(typ), @nospecialize(use_expr))
-    field = use_expr.args[3]
+try_compute_fieldidx_expr(typ::DataType, expr::Expr) = try_compute_fieldidx_args(typ, expr.args)
+function try_compute_fieldidx_args(typ::DataType, args::Vector{Any})
+    field = args[3]
     isa(field, QuoteNode) && (field = field.value)
     isa(field, Union{Int, Symbol}) || return nothing
     return try_compute_fieldidx(typ, field)
@@ -61,11 +62,13 @@ function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int)
 end
 
 function val_for_def_expr(ir::IRCode, def::Int, fidx::Int)
-    if isexpr(ir[SSAValue(def)], :new)
-        return ir[SSAValue(def)].args[1+fidx]
+    ex = ir[SSAValue(def)]
+    if isexpr(ex, :new)
+        return ex.args[1+fidx]
     else
+        @assert isa(ex, Expr)
         # The use is whatever the setfield was
-        return ir[SSAValue(def)].args[4]
+        return ex.args[4]
     end
 end
 
@@ -170,11 +173,10 @@ Starting at `val` walk use-def chains to get all the leaves feeding into
 this val (pruning those leaves rules out by path conditions).
 """
 function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), visited_phinodes::Vector{Any}=Any[])
-    if !isa(defssa, AnySSAValue) || !isa(compact[defssa], PhiNode)
-        return Any[defssa]
-    end
-    # Step 2: Figure out what the struct is defined as
+    isa(defssa, AnySSAValue) || return Any[defssa]
     def = compact[defssa]
+    isa(def, PhiNode) || return Any[defssa]
+    # Step 2: Figure out what the struct is defined as
     ## Track definitions through PiNode/PhiNode
     found_def = false
     ## Track which PhiNodes, SSAValue intermediaries
@@ -328,8 +330,8 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
                 if isa(typ, UnionAll)
                     typ = unwrap_unionall(typ)
                 end
-                (isa(typ, DataType) && (!typ.abstract)) || return nothing
-                @assert !typ.mutable
+                (isa(typ, DataType) && !isabstracttype(typ)) || return nothing
+                @assert !ismutabletype(typ)
                 if length(def.args) < 1 + field
                     if field > fieldcount(typ)
                         return nothing
@@ -461,7 +463,7 @@ end
 
 struct LiftedPhi
     ssa::AnySSAValue
-    node::Any
+    node::PhiNode
     need_argupdate::Bool
 end
 
@@ -482,7 +484,7 @@ function perform_lifting!(compact::IncrementalCompact,
     for item in visited_phinodes
         if (item, cache_key) in keys(lifting_cache)
             ssa = lifting_cache[Pair{AnySSAValue, Any}(item, cache_key)]
-            push!(lifted_phis, LiftedPhi(ssa, compact[ssa], false))
+            push!(lifted_phis, LiftedPhi(ssa, compact[ssa]::PhiNode, false))
             continue
         end
         n = PhiNode()
@@ -493,7 +495,7 @@ function perform_lifting!(compact::IncrementalCompact,
 
     # Fix up arguments
     for (old_node_ssa, lf) in zip(visited_phinodes, lifted_phis)
-        old_node = compact[old_node_ssa]
+        old_node = compact[old_node_ssa]::PhiNode
         new_node = lf.node
         lf.need_argupdate || continue
         for i = 1:length(old_node.edges)
@@ -560,18 +562,28 @@ function getfield_elim_pass!(ir::IRCode)
         #ndone += 1
         result_t = compact_exprtype(compact, SSAValue(idx))
         is_getfield = is_setfield = false
+        field_ordering = :unspecified
         is_ccall = false
         # Step 1: Check whether the statement we're looking at is a getfield/setfield!
         if is_known_call(stmt, setfield!, compact)
             is_setfield = true
             4 <= length(stmt.args) <= 5 || continue
+            if length(stmt.args) == 5
+                field_ordering = compact_exprtype(compact, stmt.args[5])
+            end
         elseif is_known_call(stmt, getfield, compact)
             is_getfield = true
-            3 <= length(stmt.args) <= 4 || continue
+            3 <= length(stmt.args) <= 5 || continue
+            if length(stmt.args) == 5
+                field_ordering = compact_exprtype(compact, stmt.args[5])
+            elseif length(stmt.args) == 4
+                field_ordering = compact_exprtype(compact, stmt.args[4])
+                widenconst(field_ordering) === Bool && (field_ordering = :unspecified)
+            end
         elseif is_known_call(stmt, isa, compact)
             # TODO
             continue
-        elseif is_known_call(stmt, typeassert, compact)
+        elseif is_known_call(stmt, typeassert, compact) && length(stmt.args) == 3
             # Canonicalize
             #   X = typeassert(Y, T)::S
             # into
@@ -592,7 +604,7 @@ function getfield_elim_pass!(ir::IRCode)
                     compact.result[idx][:line]), true)
             compact.ssa_rename[compact.idx-1] = pi
             continue
-        elseif is_known_call(stmt, (===), compact)
+        elseif is_known_call(stmt, (===), compact) && length(stmt.args) == 3
             c1 = compact_exprtype(compact, stmt.args[2])
             c2 = compact_exprtype(compact, stmt.args[3])
             if !(isa(c1, Const) || isa(c2, Const))
@@ -625,7 +637,7 @@ function getfield_elim_pass!(ir::IRCode)
                         if isa(typ, UnionAll)
                             typ = unwrap_unionall(typ)
                         end
-                        if typ isa DataType && !typ.mutable
+                        if typ isa DataType && !ismutabletype(typ)
                             process_immutable_preserve(new_preserves, compact, def)
                             old_preserves[pidx] = nothing
                             continue
@@ -660,9 +672,14 @@ function getfield_elim_pass!(ir::IRCode)
         end
         isa(struct_typ, DataType) || continue
 
+        struct_typ.name.atomicfields == C_NULL || continue # TODO: handle more
+        if !(field_ordering === :unspecified || (field_ordering isa Const && field_ordering.val === :not_atomic))
+            continue
+        end
+
         def, typeconstraint = stmt.args[2], struct_typ
 
-        if struct_typ.mutable
+        if ismutabletype(struct_typ)
             isa(def, SSAValue) || continue
             let intermediaries = IdSet()
                 callback = function(@nospecialize(pi), ssa::AnySSAValue)
@@ -775,7 +792,8 @@ function getfield_elim_pass!(ir::IRCode)
         end
         # Could still end up here if we tried to setfield! and immutable, which would
         # error at runtime, but is not illegal to have in the IR.
-        typ.mutable || continue
+        ismutabletype(typ) || continue
+        typ = typ::DataType
         # Partition defuses by field
         fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
         ok = true
@@ -838,7 +856,7 @@ function getfield_elim_pass!(ir::IRCode)
                 end
                 for b in phiblocks
                     for p in ir.cfg.blocks[b].preds
-                        n = ir[phinodes[b]]
+                        n = ir[phinodes[b]]::PhiNode
                         push!(n.edges, p)
                         push!(n.values, compute_value_for_block(ir, domtree,
                             allblocks, du, phinodes, fidx, p))
@@ -855,7 +873,7 @@ function getfield_elim_pass!(ir::IRCode)
         push!(intermediaries, idx)
         # Insert the new preserves
         for (use, new_preserves) in preserve_uses
-            useexpr = ir[SSAValue(use)]
+            useexpr = ir[SSAValue(use)]::Expr
             nccallargs = length(useexpr.args[3]::SimpleVector)
             old_preserves = let intermediaries = intermediaries
                 filter(ssa->!isa(ssa, SSAValue) || !(ssa.id in intermediaries), useexpr.args[(6+nccallargs):end])
@@ -1006,6 +1024,7 @@ function type_lift_pass!(ir::IRCode)
                             insert_node!(ir, item, NewInstruction(PhiNode(edges, values), Bool))
                         end
                     else
+                        def = def::PhiCNode
                         values = Vector{Any}(undef, length(def.values))
                         new_phi = if length(values) == 0
                             false
@@ -1025,7 +1044,7 @@ function type_lift_pass!(ir::IRCode)
                         elseif !isa(def.values[i], SSAValue)
                             val = true
                         else
-                            up_id = id = def.values[i].id
+                            up_id = id = (def.values[i]::SSAValue).id
                             @label restart
                             if !isa(ir.stmts[id][:type], MaybeUndef)
                                 val = true
@@ -1037,7 +1056,7 @@ function type_lift_pass!(ir::IRCode)
                                     elseif !isa(node.val, SSAValue)
                                         val = true
                                     else
-                                        id = node.val.id
+                                        id = (node.val::SSAValue).id
                                         @goto restart
                                     end
                                 else
@@ -1049,7 +1068,7 @@ function type_lift_pass!(ir::IRCode)
                                         if haskey(processed, id)
                                             val = processed[id]
                                         else
-                                            push!(worklist, (id, up_id, new_phi, i))
+                                            push!(worklist, (id, up_id, new_phi::SSAValue, i))
                                             continue
                                         end
                                     else
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index 851cc46def6ea1..4174267ec5a5ee 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -192,7 +192,7 @@ example (taken from `@code_typed sin(1.0)`):
 ```
 
 The three annotations are indicated with `*`. The first one is the line number of the
-active function (printed once whenver the outer most line number changes). The second
+active function (printed once whenever the outer most line number changes). The second
 is the inlining indicator. The number of lines indicate the level of nesting, with a
 half-size line (╷) indicating the start of a scope and a full size line (│) indicating
 a continuing scope. The last annotation is the most complicated one. It is a heuristic
@@ -201,7 +201,7 @@ scope that hasn't been printed before. Let's work a number of examples to see th
 and tradeoffs involved.
 
 ```
-f() = leaf_function() # Delibarately not defined to end up in the IR verbatim
+f() = leaf_function() # Deliberately not defined to end up in the IR verbatim
 g() = f()
 h() = g()
 top_function() = h()
@@ -455,7 +455,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                     if frame.line != typemax(frame.line) && frame.line != 0
                         print(io, ":", frame.line)
                     end
-                    print(io, " within `", method_name(frame), "'")
+                    print(io, " within `", method_name(frame), "`")
                     if collapse
                         method = method_name(frame)
                         while nctx < nframes
@@ -487,31 +487,81 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
     return emit_lineinfo_update
 end
 
+# line_info_preprinter(io::IO, indent::String, idx::Int) may print relevant info
+#   at the beginning of the line, and should at least print `indent`. It returns a
+#   string that will be printed after the final basic-block annotation.
+# line_info_postprinter(io::IO, typ, used::Bool) prints the type-annotation at the end
+#   of the statement
+# should_print_stmt(idx::Int) -> Bool: whether the statement at index `idx` should be
+#   printed as part of the IR or not
+# bb_color: color used for printing the basic block brackets on the left
+struct IRShowConfig
+    line_info_preprinter
+    line_info_postprinter
+    should_print_stmt
+    bb_color::Symbol
+    function IRShowConfig(line_info_preprinter, line_info_postprinter=default_expr_type_printer;
+                          should_print_stmt=Returns(true), bb_color::Symbol=:light_black)
+        return new(line_info_preprinter, line_info_postprinter, should_print_stmt, bb_color)
+    end
+end
 
-function show_ir(io::IO, code::IRCode, expr_type_printer=default_expr_type_printer; verbose_linetable=false)
-    cols = (displaysize(io)::Tuple{Int,Int})[2]
-    used = BitSet()
+struct _UNDEF
+    global const UNDEF = _UNDEF.instance
+end
+
+function _stmt(code::IRCode, idx::Int)
     stmts = code.stmts
-    isempty(stmts) && return # unlikely, but avoid errors from reducing over empty sets
-    cfg = code.cfg
-    max_bb_idx_size = length(string(length(cfg.blocks)))
-    new_nodes = code.new_nodes.stmts
-    new_nodes_info = code.new_nodes.info
-    bb_idx = 1
-    for stmt in stmts
-        scan_ssa_use!(push!, used, stmt[:inst])
-    end
-    if any(i -> !isassigned(new_nodes.inst, i), 1:length(new_nodes))
-        printstyled(io, "ERROR: New node array has unset entry\n", color=:red)
-        new_nodes_perm = filter(i -> isassigned(new_nodes.inst, i), 1:length(new_nodes))
-    else
-        new_nodes_perm = collect(1:length(new_nodes))
-    end
-    for nn in new_nodes_perm
-        scan_ssa_use!(push!, used, new_nodes[nn][:inst])
+    return isassigned(stmts.inst, idx) ? stmts[idx][:inst] : UNDEF
+end
+function _stmt(code::CodeInfo, idx::Int)
+    code = code.code
+    return isassigned(code, idx) ? code[idx] : UNDEF
+end
+
+function _type(code::IRCode, idx::Int)
+    stmts = code.stmts
+    return isassigned(stmts.type, idx) ? stmts[idx][:type] : UNDEF
+end
+function _type(code::CodeInfo, idx::Int)
+    types = code.ssavaluetypes
+    types isa Vector{Any} || return nothing
+    return isassigned(types, idx) ? types[idx] : UNDEF
+end
+
+function statement_indices_to_labels(stmt, cfg::CFG)
+    # convert statement index to labels, as expected by print_stmt
+    if stmt isa Expr
+        if stmt.head === :enter && length(stmt.args) == 1 && stmt.args[1] isa Int
+            stmt = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
+        end
+    elseif isa(stmt, GotoIfNot)
+        stmt = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
+    elseif stmt isa GotoNode
+        stmt = GotoNode(block_for_inst(cfg, stmt.label))
+    elseif stmt isa PhiNode
+        e = stmt.edges
+        stmt = PhiNode(Int32[block_for_inst(cfg, Int(e[i])) for i in 1:length(e)], stmt.values)
     end
-    sort!(new_nodes_perm, by = x -> (x = new_nodes_info[x]; (x.pos, x.attach_after)))
-    perm_idx = 1
+    return stmt
+end
+
+# Show a single statement, code.stmts[idx]/code.code[idx], in the context of the whole IRCode/CodeInfo.
+# Returns the updated value of bb_idx.
+# pop_new_node!(idx::Int) -> (node_idx, new_node_inst, new_node_type) may return a new
+#   node at the current index `idx`, which is printed before the statement at index
+#   `idx`. This function is repeatedly called until it returns `nothing`
+function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, config::IRShowConfig,
+                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
+    return show_ir_stmt(io, code, idx, config.line_info_preprinter, config.line_info_postprinter,
+                        used, cfg, bb_idx; pop_new_node!, config.bb_color)
+end
+
+function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info_preprinter, line_info_postprinter,
+                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), bb_color=:light_black)
+    stmt = _stmt(code, idx)
+    type = _type(code, idx)
+    max_bb_idx_size = length(string(length(cfg.blocks)))
 
     if isempty(used)
         maxlength_idx = 0
@@ -519,252 +569,214 @@ function show_ir(io::IO, code::IRCode, expr_type_printer=default_expr_type_print
         maxused = maximum(used)
         maxlength_idx = length(string(maxused))
     end
-    if !verbose_linetable
-        (loc_annotations, loc_methods, loc_lineno) = compute_ir_line_annotations(code)
-        max_loc_width = maximum(length(str) for str in loc_annotations)
-        max_lineno_width = maximum(length(str) for str in loc_lineno)
-        max_method_width = maximum(length(str) for str in loc_methods)
+
+    if stmt === UNDEF
+        # This is invalid, but do something useful rather
+        # than erroring, to make debugging easier
+        printstyled(io, "#UNDEF\n", color=:red)
+        return bb_idx
     end
-    max_depth = maximum(compute_inlining_depth(code.linetable, stmts[i][:line]) for i in 1:length(stmts.line))
-    last_stack = []
-    for idx in 1:length(stmts)
-        if !isassigned(stmts.inst, idx)
-            # This is invalid, but do something useful rather
-            # than erroring, to make debugging easier
-            printstyled(io, "#UNDEF\n", color=:red)
-            continue
-        end
-        stmt = stmts[idx]
+
+    i = 1
+    while true
+        next = pop_new_node!(idx)
         # Compute BB guard rail
         if bb_idx > length(cfg.blocks)
-            # Even if invariants are violated, try our best to still print
-            bbrange = (length(cfg.blocks) == 0 ? 1 : last(cfg.blocks[end].stmts) + 1):typemax(Int)
-            bb_idx_str = "!"
-            bb_type = "─"
+            # If invariants are violated, print a special leader
+            linestart = " "^(max_bb_idx_size + 2) # not inside a basic block bracket
+            inlining_indent = line_info_preprinter(io, linestart, i == 1 ? idx : 0)
+            printstyled(io, "!!! ", "─"^max_bb_idx_size, color=bb_color)
         else
             bbrange = cfg.blocks[bb_idx].stmts
             bbrange = bbrange.start:bbrange.stop
-            bb_idx_str = string(bb_idx)
-            bb_type = length(cfg.blocks[bb_idx].preds) <= 1 ? "─" : "┄"
-        end
-        bb_pad = max_bb_idx_size - length(bb_idx_str)
-        bb_start_str = string(bb_idx_str, " ", bb_type, "─"^bb_pad, " ")
-        bb_guard_rail_cont = string("│  ", " "^max_bb_idx_size)
-        if idx == first(bbrange)
-            bb_guard_rail = bb_start_str
-        else
-            bb_guard_rail = bb_guard_rail_cont
-        end
-        floop = true
-        # Print linetable information
-        if verbose_linetable
-            stack = compute_loc_stack(code.linetable, stmt[:line])
-            # We need to print any stack frames that did not exist in the last stack
-            ndepth = max(1, length(stack))
-            rail = string(" "^(max_depth+1-ndepth), "│"^ndepth)
-            start_column = cols - max_depth - 10
-            for (i, x) in enumerate(stack)
-                if i > length(last_stack) || last_stack[i] != x
-                    entry = code.linetable[x]
-                    printstyled(io, "\e[$(start_column)G$(rail)\e[1G", color = :light_black)
-                    print(io, bb_guard_rail)
-                    ssa_guard = " "^(maxlength_idx + 4 + (i - 1))
-                    entry_label = "$(ssa_guard)$(method_name(entry)) at $(entry.file):$(entry.line) "
-                    hline = string("─"^(start_column-length(entry_label)-length(bb_guard_rail)+max_depth-i), "┐")
-                    printstyled(io, string(entry_label, hline), "\n"; color=:light_black)
-                    bb_guard_rail = bb_guard_rail_cont
-                    floop = false
-                end
-            end
-            printstyled(io, "\e[$(start_column)G$(rail)\e[1G", color = :light_black)
-            last_stack = stack
-        else
-            if idx <= length(loc_annotations)
-                # N.B.: The line array length not matching is invalid,
-                # but let's be robust here
-                annotation = loc_annotations[idx]
-                loc_method = loc_methods[idx]
-                lineno = loc_lineno[idx]
+            # Print line info update
+            linestart = idx == first(bbrange) ? "  " : sprint(io -> printstyled(io, "│ ", color=bb_color), context=io)
+            linestart *= " "^max_bb_idx_size
+            # idx == 0 means only indentation is printed, so we don't print linfos
+            # multiple times if the are new nodes
+            inlining_indent = line_info_preprinter(io, linestart, i == 1 ? idx : 0)
+
+            if i == 1 && idx == first(bbrange)
+                bb_idx_str = string(bb_idx)
+                bb_pad = max_bb_idx_size - length(bb_idx_str)
+                bb_type = length(cfg.blocks[bb_idx].preds) <= 1 ? "─" : "┄"
+                printstyled(io, bb_idx_str, " ", bb_type, "─"^bb_pad, color=bb_color)
+            elseif next === nothing && idx == last(bbrange) # print separator
+                printstyled(io, "└", "─"^(1 + max_bb_idx_size), color=bb_color)
             else
-                annotation = "!"
-                loc_method = ""
-                lineno = ""
-            end
-            # Print location information right aligned. If the line below is too long, it'll overwrite this,
-            # but that's what we want.
-            if get(io, :color, false)
-                method_start_column = cols - max_method_width - max_loc_width - 2
-                filler = " "^(max_loc_width-length(annotation))
-                printstyled(io, "\e[$(method_start_column)G$(annotation)$(filler)$(loc_method)\e[1G", color = :light_black)
+                printstyled(io, "│ ", " "^max_bb_idx_size, color=bb_color)
             end
-            printstyled(io, lineno, " "^(max_lineno_width - length(lineno) + 1); color = :light_black)
-        end
-        idx != last(bbrange) && print(io, bb_guard_rail)
-        print_sep = false
-        if idx == last(bbrange)
-            print_sep = true
         end
-        # print new nodes first in the right position
-        while perm_idx <= length(new_nodes_perm)
-            node_idx = new_nodes_perm[perm_idx]
-            if new_nodes_info[node_idx].pos != idx
-                break
-            end
-            perm_idx += 1
-            if !floop && !verbose_linetable
-                print(io, " "^(max_lineno_width + 1))
-            end
-            if print_sep
-                if idx == first(bbrange) && floop
-                    print(io, bb_start_str)
-                else
-                    print(io, "│  ", " "^max_bb_idx_size)
-                end
-            end
-            print_sep = true
-            floop = false
-            new_node = new_nodes[node_idx]
-            node_idx += length(stmts)
-            show_type = should_print_ssa_type(new_node[:inst])
-            with_output_color(:green, io) do io′
-                print_stmt(io′, node_idx, new_node[:inst], used, maxlength_idx, false, show_type)
-            end
-            if !isassigned(stmts.type, idx) # try to be robust against errors
-                printstyled(io, "::#UNDEF", color=:red)
-            elseif show_type
-                expr_type_printer(io, new_node[:type], node_idx in used)
-            end
-            println(io)
-        end
-        if !floop && !verbose_linetable
-            print(io, " "^(max_lineno_width + 1))
-        end
-        if print_sep
-            if idx == first(bbrange) && floop
-                print(io, bb_start_str)
-            elseif idx == last(bbrange)
-                print(io, "└", "─"^(1 + max_bb_idx_size), " ")
-            else
-                print(io, "│  ", " "^max_bb_idx_size)
+        print(io, inlining_indent, " ")
+
+        if next === nothing
+            if bb_idx <= length(cfg.blocks) && idx == last(bbrange)
+                bb_idx += 1
             end
+            break
         end
-        if idx == last(bbrange)
-            bb_idx += 1
+
+        # print new nodes first in the right position
+        node_idx, new_node_inst, new_node_type = next
+
+        @assert new_node_inst !== UNDEF # we filtered these out earlier
+        show_type = should_print_ssa_type(new_node_inst)
+        with_output_color(:green, io) do io′
+            print_stmt(io′, node_idx, new_node_inst, used, maxlength_idx, false, show_type)
         end
-        show_type = should_print_ssa_type(stmt[:inst])
-        print_stmt(io, idx, stmt[:inst], used, maxlength_idx, true, show_type)
-        if !isassigned(stmts.type, idx) # try to be robust against errors
+
+        if new_node_type === UNDEF # try to be robust against errors
             printstyled(io, "::#UNDEF", color=:red)
         elseif show_type
-            expr_type_printer(io, stmt[:type], idx in used)
+            line_info_postprinter(IOContext(io, :idx => node_idx), new_node_type, node_idx in used)
         end
         println(io)
+        i += 1
     end
-end
-
-# Show a single statement, code.code[idx], in the context of the whole CodeInfo.
-# Returns the updated value of bb_idx.
-# line_info_preprinter(io::IO, indent::String, idx::Int) may print relevant info
-#   at the beginning of the line, and should at least print `indent`. It returns a
-#   string that will be printed after the final basic-block annotation.
-# line_info_postprinter(io::IO, typ, used::Bool) prints the type-annotation at the end
-#   of the statement
-function show_ir_stmt(io::IO, code::CodeInfo, idx::Int, line_info_preprinter, line_info_postprinter, used::BitSet, cfg::CFG, bb_idx::Int)
-    stmts = code.code
-    types = code.ssavaluetypes
-    max_bb_idx_size = length(string(length(cfg.blocks)))
-
-    if isempty(used)
-        maxlength_idx = 0
-    else
-        maxused = maximum(used)
-        maxlength_idx = length(string(maxused))
-    end
-
-    if !isassigned(stmts, idx)
-        # This is invalid, but do something useful rather
-        # than erroring, to make debugging easier
-        printstyled(io, "#UNDEF\n", color=:red)
-        return bb_idx
-    end
-    stmt = stmts[idx]
-    # Compute BB guard rail
-    if bb_idx > length(cfg.blocks)
-        # If invariants are violated, print a special leader
-        linestart = " "^(max_bb_idx_size + 2) # not inside a basic block bracket
-        inlining_indent = line_info_preprinter(io, linestart, idx)
-        printstyled(io, "!!! ", "─"^max_bb_idx_size, color=:light_black)
-    else
-        bbrange = cfg.blocks[bb_idx].stmts
-        bbrange = bbrange.start:bbrange.stop
-        # Print line info update
-        linestart = idx == first(bbrange) ? "  " : sprint(io -> printstyled(io, "│ ", color=:light_black), context=io)
-        linestart *= " "^max_bb_idx_size
-        inlining_indent = line_info_preprinter(io, linestart, idx)
-        if idx == first(bbrange)
-            bb_idx_str = string(bb_idx)
-            bb_pad = max_bb_idx_size - length(bb_idx_str)
-            bb_type = length(cfg.blocks[bb_idx].preds) <= 1 ? "─" : "┄"
-            printstyled(io, bb_idx_str, " ", bb_type, "─"^bb_pad, color=:light_black)
-        elseif idx == last(bbrange) # print separator
-            printstyled(io, "└", "─"^(1 + max_bb_idx_size), color=:light_black)
-        else
-            printstyled(io, "│ ", " "^max_bb_idx_size, color=:light_black)
-        end
-        if idx == last(bbrange)
-            bb_idx += 1
-        end
+    if code isa CodeInfo
+        stmt = statement_indices_to_labels(stmt, cfg)
     end
-    print(io, inlining_indent, " ")
-    # convert statement index to labels, as expected by print_stmt
-    if stmt isa Expr
-        if stmt.head === :enter && length(stmt.args) == 1 && stmt.args[1] isa Int
-            stmt = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
-        end
-    elseif isa(stmt, GotoIfNot)
-        stmt = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
-    elseif stmt isa GotoNode
-        stmt = GotoNode(block_for_inst(cfg, stmt.label))
-    elseif stmt isa PhiNode
-        e = stmt.edges
-        stmt = PhiNode(Int32[block_for_inst(cfg, Int(e[i])) for i in 1:length(e)], stmt.values)
-    end
-    show_type = types isa Vector{Any} && should_print_ssa_type(stmt)
+    show_type = type !== nothing && should_print_ssa_type(stmt)
     print_stmt(io, idx, stmt, used, maxlength_idx, true, show_type)
-    if types isa Vector{Any} # ignore types for pre-inference code
-        if !isassigned(types, idx)
+    if type !== nothing # ignore types for pre-inference code
+        if type === UNDEF
             # This is an error, but can happen if passes don't update their type information
             printstyled(io, "::#UNDEF", color=:red)
         elseif show_type
-            typ = types[idx]
-            line_info_postprinter(io, typ, idx in used)
+            line_info_postprinter(IOContext(io, :idx => idx), type, idx in used)
         end
     end
     println(io)
     return bb_idx
 end
 
+function ircode_new_nodes_iter(code::IRCode)
+    stmts = code.stmts
+    new_nodes = code.new_nodes.stmts
+    new_nodes_info = code.new_nodes.info
+    new_nodes_perm = filter(i -> isassigned(new_nodes.inst, i), 1:length(new_nodes))
+    sort!(new_nodes_perm, by = x -> (x = new_nodes_info[x]; (x.pos, x.attach_after)))
+    perm_idx = Ref(1)
+
+    function (idx::Int)
+        perm_idx[] <= length(new_nodes_perm) || return nothing
+        node_idx = new_nodes_perm[perm_idx[]]
+        if new_nodes_info[node_idx].pos != idx
+            return nothing
+        end
+        perm_idx[] += 1
+        new_node = new_nodes[node_idx]
+        new_node_inst = isassigned(new_nodes.inst, node_idx) ? new_node[:inst] : UNDEF
+        new_node_type = isassigned(new_nodes.type, node_idx) ? new_node[:type] : UNDEF
+        node_idx += length(stmts)
+        return node_idx, new_node_inst, new_node_type
+    end
+end
+
+# print only line numbers on the left, some of the method names and nesting depth on the right
+function inline_linfo_printer(code::IRCode)
+    loc_annotations, loc_methods, loc_lineno = compute_ir_line_annotations(code)
+    max_loc_width = maximum(length, loc_annotations)
+    max_lineno_width = maximum(length, loc_lineno)
+    max_method_width = maximum(length, loc_methods)
+
+    function (io::IO, indent::String, idx::Int)
+        cols = (displaysize(io)::Tuple{Int,Int})[2]
+
+        if idx == 0
+            annotation = ""
+            loc_method = ""
+            lineno = ""
+        elseif idx <= length(loc_annotations)
+            # N.B.: The line array length not matching is invalid,
+            # but let's be robust here
+            annotation = loc_annotations[idx]
+            loc_method = loc_methods[idx]
+            lineno = loc_lineno[idx]
+        else
+            annotation = "!"
+            loc_method = ""
+            lineno = ""
+        end
+        # Print location information right aligned. If the line below is too long, it'll overwrite this,
+        # but that's what we want.
+        if get(io, :color, false)
+            method_start_column = cols - max_method_width - max_loc_width - 2
+            filler = " "^(max_loc_width-length(annotation))
+            printstyled(io, "\e[$(method_start_column)G$(annotation)$(filler)$(loc_method)\e[1G", color = :light_black)
+        end
+        printstyled(io, lineno, " "^(max_lineno_width - length(lineno) + 1); color = :light_black)
+        return ""
+    end
+end
+
+_strip_color(s::String) = replace(s, r"\e\[\d+m" => "")
+
+function statementidx_lineinfo_printer(f, code::IRCode)
+    printer = f(code.linetable)
+    function (io::IO, indent::String, idx::Int)
+        printer(io, indent, idx > 0 ? code.stmts[idx][:line] : typemin(Int32))
+    end
+end
 function statementidx_lineinfo_printer(f, code::CodeInfo)
     printer = f(code.linetable)
-    return (io::IO, indent::String, idx::Int) -> printer(io, indent, idx > 0 ? code.codelocs[idx] : typemin(Int32))
+    function (io::IO, indent::String, idx::Int)
+        printer(io, indent, idx > 0 ? code.codelocs[idx] : typemin(Int32))
+    end
 end
-statementidx_lineinfo_printer(code::CodeInfo) = statementidx_lineinfo_printer(DILineInfoPrinter, code)
+statementidx_lineinfo_printer(code) = statementidx_lineinfo_printer(DILineInfoPrinter, code)
 
-function show_ir(io::IO, code::CodeInfo, line_info_preprinter=statementidx_lineinfo_printer(code), line_info_postprinter=default_expr_type_printer)
+function stmts_used(io::IO, code::IRCode, warn_unset_entry=true)
+    stmts = code.stmts
+    used = BitSet()
+    for stmt in stmts
+        scan_ssa_use!(push!, used, stmt[:inst])
+    end
+    new_nodes = code.new_nodes.stmts
+    for nn in 1:length(new_nodes)
+        if isassigned(new_nodes.inst, nn)
+            scan_ssa_use!(push!, used, new_nodes[nn][:inst])
+        elseif warn_unset_entry
+            printstyled(io, "ERROR: New node array has unset entry\n", color=:red)
+            warn_unset_entry = false
+        end
+    end
+    return used
+end
+
+function stmts_used(::IO, code::CodeInfo)
     stmts = code.code
     used = BitSet()
-    cfg = compute_basic_blocks(stmts)
     for stmt in stmts
         scan_ssa_use!(push!, used, stmt)
     end
+    return used
+end
+
+function default_config(code::IRCode; verbose_linetable=false)
+    return IRShowConfig(verbose_linetable ? statementidx_lineinfo_printer(code)
+                                          : inline_linfo_printer(code);
+                        bb_color=:normal)
+end
+default_config(code::CodeInfo) = IRShowConfig(statementidx_lineinfo_printer(code))
+
+function show_ir(io::IO, code::Union{IRCode, CodeInfo}, config::IRShowConfig=default_config(code);
+                 pop_new_node! = code isa IRCode ? ircode_new_nodes_iter(code) : Returns(nothing))
+    stmts = code isa IRCode ? code.stmts : code.code
+    used = stmts_used(io, code)
+    cfg = code isa IRCode ? code.cfg : compute_basic_blocks(stmts)
     bb_idx = 1
 
     for idx in 1:length(stmts)
-        bb_idx = show_ir_stmt(io, code, idx, line_info_preprinter, line_info_postprinter, used, cfg, bb_idx)
+        if config.should_print_stmt(code, idx, used)
+            bb_idx = show_ir_stmt(io, code, idx, config, used, cfg, bb_idx; pop_new_node!)
+        elseif bb_idx <= length(cfg.blocks) && idx == cfg.blocks[bb_idx].stmts.stop
+            bb_idx += 1
+        end
     end
 
     max_bb_idx_size = length(string(length(cfg.blocks)))
-    line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
+    config.line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
     nothing
 end
 
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 3b6953fc53d194..2e3d1da1c168fa 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -47,7 +47,7 @@ function scan_slot_def_use(nargs::Int, ci::CodeInfo, code::Vector{Any})
     nslots = length(ci.slotflags)
     result = SlotInfo[SlotInfo() for i = 1:nslots]
     # Set defs for arguments
-    for var in result[1:(1+nargs)]
+    for var in result[1:nargs]
         push!(var.defs, 0)
     end
     for idx in 1:length(code)
@@ -183,16 +183,17 @@ function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), r
     return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt)
 end
 
-function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}, flags::Vector{UInt8})
+function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any})
     # Remove `nothing`s at the end, we don't handle them well
     # (we expect the last instruction to be a terminator)
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
     for i = length(code):-1:1
         if code[i] !== nothing
             resize!(code, i)
-            resize!(ci.ssavaluetypes, i)
+            resize!(ssavaluetypes, i)
             resize!(ci.codelocs, i)
             resize!(info, i)
-            resize!(flags, i)
+            resize!(ci.ssaflags, i)
             break
         end
     end
@@ -201,10 +202,10 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}
     term = code[end]
     if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
         push!(code, ReturnNode())
-        push!(ci.ssavaluetypes, Union{})
+        push!(ssavaluetypes, Union{})
         push!(ci.codelocs, 0)
         push!(info, nothing)
-        push!(flags, 0x00)
+        push!(ci.ssaflags, 0x00)
     end
     nothing
 end
@@ -585,7 +586,7 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode
     return new_typ
 end
 
-function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, nargs::Int,
+function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse,
                         slottypes::Vector{Any})
     code = ir.stmts.inst
     cfg = ir.cfg
@@ -594,7 +595,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
     for idx in 1:length(code)
         stmt = code[idx]
         if isexpr(stmt, :enter)
-            push!(catch_entry_blocks, (block_for_inst(cfg, idx), block_for_inst(cfg, stmt.args[1])))
+            push!(catch_entry_blocks, (block_for_inst(cfg, idx), block_for_inst(cfg, stmt.args[1]::Int)))
         end
     end
 
@@ -809,9 +810,10 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         end
     end
     # Convert into IRCode form
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
     nstmts = length(ir.stmts)
     new_code = Vector{Any}(undef, nstmts)
-    ssavalmap = fill(SSAValue(-1), length(ci.ssavaluetypes) + 1)
+    ssavalmap = fill(SSAValue(-1), length(ssavaluetypes) + 1)
     result_types = Any[Any for _ in 1:nstmts]
     # Detect statement positions for assignments and construct array
     for (bb, idx) in bbidxiter(ir)
@@ -823,19 +825,19 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             new_dest = block_for_inst(cfg, stmt.dest)
             if new_dest == bb+1
                 # Drop this node - it's a noop
-                new_code[idx] = stmt.cond
+                new_code[idx] = Expr(:call, GlobalRef(Core, :typeassert), stmt.cond, GlobalRef(Core, :Bool))
             else
                 new_code[idx] = GotoIfNot(stmt.cond, new_dest)
             end
         elseif isexpr(stmt, :enter)
-            new_code[idx] = Expr(:enter, block_for_inst(cfg, stmt.args[1]))
+            new_code[idx] = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
             ssavalmap[idx] = SSAValue(idx) # Slot to store token for pop_exception
         elseif isexpr(stmt, :leave) || isexpr(stmt, :(=)) || isa(stmt, ReturnNode) ||
             isexpr(stmt, :meta) || isa(stmt, NewvarNode)
             new_code[idx] = stmt
         else
             ssavalmap[idx] = SSAValue(idx)
-            result_types[idx] = ci.ssavaluetypes[idx]
+            result_types[idx] = ssavaluetypes[idx]
             if isa(stmt, PhiNode)
                 edges = Int32[edge == 0 ? 0 : block_for_inst(cfg, Int(edge)) for edge in stmt.edges]
                 new_code[idx] = PhiNode(edges, stmt.values)
@@ -871,7 +873,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         changed = false
         for new_idx in type_refine_phi
             node = new_nodes.stmts[new_idx]
-            new_typ = recompute_type(node[:inst], ci, ir, ir.sptypes, slottypes)
+            new_typ = recompute_type(node[:inst]::Union{PhiNode,PhiCNode}, ci, ir, ir.sptypes, slottypes)
             if !(node[:type] ⊑ new_typ) || !(new_typ ⊑ node[:type])
                 node[:type] = new_typ
                 changed = true
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index 6c2953c12c5054..653923ace6e8ea 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -111,7 +111,7 @@ function verify_ir(ir::IRCode, print::Bool=true)
             end
         elseif isexpr(terminator, :enter)
             @label enter_check
-            if length(block.succs) != 2 || (block.succs != [terminator.args[1], idx+1] && block.succs != [idx+1, terminator.args[1]])
+            if length(block.succs) != 2 || (block.succs != Int[terminator.args[1], idx+1] && block.succs != Int[idx+1, terminator.args[1]])
                 @verify_error "Block $idx successors ($(block.succs)), does not match :enter terminator"
                 error("")
             end
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index ad7e8886b6cce5..0c54e9359fa1a2 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -9,7 +9,7 @@ to re-consult the method table. This info is illegal on any statement that is
 not a call to a generic function.
 """
 struct MethodMatchInfo
-    results::Union{Missing, MethodLookupResult}
+    results::MethodLookupResult
 end
 
 """
@@ -108,6 +108,7 @@ method being processed.
 """
 struct InvokeCallInfo
     match::MethodMatch
+    result::Union{Nothing,InferenceResult}
 end
 
 struct OpaqueClosureCallInfo
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index 921fbdfe32d034..ba83516ef2d8fb 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -28,15 +28,15 @@ const DATATYPE_NAME_FIELDINDEX = fieldindex(DataType, :name)
 const DATATYPE_PARAMETERS_FIELDINDEX = fieldindex(DataType, :parameters)
 const DATATYPE_TYPES_FIELDINDEX = fieldindex(DataType, :types)
 const DATATYPE_SUPER_FIELDINDEX = fieldindex(DataType, :super)
-const DATATYPE_MUTABLE_FIELDINDEX = fieldindex(DataType, :mutable)
 const DATATYPE_INSTANCE_FIELDINDEX = fieldindex(DataType, :instance)
-const DATATYPE_ABSTRACT_FIELDINDEX = fieldindex(DataType, :abstract)
-const DATATYPE_NAMES_FIELDINDEX = fieldindex(DataType, :names)
+const DATATYPE_HASH_FIELDINDEX = fieldindex(DataType, :hash)
 
 const TYPENAME_NAME_FIELDINDEX = fieldindex(Core.TypeName, :name)
 const TYPENAME_MODULE_FIELDINDEX = fieldindex(Core.TypeName, :module)
 const TYPENAME_NAMES_FIELDINDEX = fieldindex(Core.TypeName, :names)
 const TYPENAME_WRAPPER_FIELDINDEX = fieldindex(Core.TypeName, :wrapper)
+const TYPENAME_HASH_FIELDINDEX = fieldindex(Core.TypeName, :hash)
+const TYPENAME_FLAGS_FIELDINDEX = fieldindex(Core.TypeName, :flags)
 
 ##########
 # tfuncs #
@@ -88,7 +88,7 @@ function instanceof_tfunc(@nospecialize(t))
             # a real instance must be within the declared bounds of the type,
             # so we can intersect with the original wrapper.
             tr = typeintersect(tr, t′′.name.wrapper)
-            isconcrete = !t′′.abstract
+            isconcrete = !isabstracttype(t′′)
             if tr === Union{}
                 # runtime unreachable (our inference Type{T} where S is
                 # uninhabited with any runtime T that exists)
@@ -193,7 +193,6 @@ add_tfunc(ne_float, 2, 2, cmp_tfunc, 2)
 add_tfunc(lt_float, 2, 2, cmp_tfunc, 2)
 add_tfunc(le_float, 2, 2, cmp_tfunc, 2)
 add_tfunc(fpiseq, 2, 2, cmp_tfunc, 1)
-add_tfunc(fpislt, 2, 2, cmp_tfunc, 1)
 add_tfunc(eq_float_fast, 2, 2, cmp_tfunc, 1)
 add_tfunc(ne_float_fast, 2, 2, cmp_tfunc, 1)
 add_tfunc(lt_float_fast, 2, 2, cmp_tfunc, 1)
@@ -262,6 +261,7 @@ function isdefined_nothrow(argtypes::Array{Any, 1})
         (argtypes[2] ⊑ Symbol || argtypes[2] ⊑ Int) :
          argtypes[2] ⊑ Symbol
 end
+isdefined_tfunc(arg1, sym, order) = (@nospecialize; isdefined_tfunc(arg1, sym))
 function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
     if isa(arg1, Const)
         a1 = typeof(arg1.val)
@@ -272,7 +272,7 @@ function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
         return Bool
     end
     a1 = unwrap_unionall(a1)
-    if isa(a1, DataType) && !a1.abstract
+    if isa(a1, DataType) && !isabstracttype(a1)
         if a1 === Module
             Symbol <: widenconst(sym) || return Bottom
             if isa(sym, Const) && isa(sym.val, Symbol) && isa(arg1, Const) && isdefined(arg1.val, sym.val)
@@ -287,7 +287,7 @@ function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
             else
                 return Bottom
             end
-            if 1 <= idx <= a1.ninitialized
+            if 1 <= idx <= datatype_min_ninitialized(a1)
                 return Const(true)
             elseif a1.name === _NAMEDTUPLE_NAME
                 if isconcretetype(a1)
@@ -315,7 +315,7 @@ function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
     end
     return Bool
 end
-add_tfunc(isdefined, 2, 2, isdefined_tfunc, 1)
+add_tfunc(isdefined, 2, 3, isdefined_tfunc, 1)
 
 function sizeof_nothrow(@nospecialize(x))
     if isa(x, Const)
@@ -405,7 +405,7 @@ function nfields_tfunc(@nospecialize(x))
     isa(x, Conditional) && return Const(0)
     x = unwrap_unionall(widenconst(x))
     isconstType(x) && return Const(nfields(x.parameters[1]))
-    if isa(x, DataType) && !x.abstract
+    if isa(x, DataType) && !isabstracttype(x)
         if !(x.name === Tuple.name && isvatuple(x)) &&
            !(x.name === _NAMEDTUPLE_NAME && !isconcretetype(x))
             return Const(isdefined(x, :types) ? length(x.types) : length(x.name.names))
@@ -466,25 +466,51 @@ add_tfunc(Core._typevar, 3, 3, typevar_tfunc, 100)
 add_tfunc(applicable, 1, INT_INF, (@nospecialize(f), args...)->Bool, 100)
 add_tfunc(Core.Intrinsics.arraylen, 1, 1, @nospecialize(x)->Int, 4)
 add_tfunc(arraysize, 2, 2, (@nospecialize(a), @nospecialize(d))->Int, 4)
+
 function pointer_eltype(@nospecialize(ptr))
     a = widenconst(ptr)
-    if a <: Ptr
-        if isa(a,DataType) && isa(a.parameters[1],Type)
-            return a.parameters[1]
-        elseif isa(a,UnionAll) && !has_free_typevars(a)
-            unw = unwrap_unionall(a)
-            if isa(unw,DataType)
-                return rewrap_unionall(unw.parameters[1], a)
-            end
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            T isa Type && return rewrap_unionall(T, a)
         end
     end
     return Any
 end
-add_tfunc(pointerref, 3, 3,
-          function (@nospecialize(a), @nospecialize(i), @nospecialize(align))
-            return pointer_eltype(a)
-          end, 4)
-add_tfunc(pointerset, 4, 4, (@nospecialize(a), @nospecialize(v), @nospecialize(i), @nospecialize(align)) -> a, 5)
+function atomic_pointermodify_tfunc(ptr, op, v, order)
+    @nospecialize
+    a = widenconst(ptr)
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            # note: we could sometimes refine this to a PartialStruct if we analyzed `op(T, T)::T`
+            T isa Type && return rewrap_unionall(Pair{T, T}, a)
+        end
+    end
+    return Pair
+end
+function atomic_pointerreplace_tfunc(ptr, x, v, success_order, failure_order)
+    @nospecialize
+    a = widenconst(ptr)
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            T isa Type && return rewrap_unionall(ccall(:jl_apply_cmpswap_type, Any, (Any,), T), a)
+        end
+    end
+    return ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+end
+add_tfunc(pointerref, 3, 3, (a, i, align) -> (@nospecialize; pointer_eltype(a)), 4)
+add_tfunc(pointerset, 4, 4, (a, v, i, align) -> (@nospecialize; a), 5)
+add_tfunc(atomic_fence, 1, 1, (order) -> (@nospecialize; Nothing), 4)
+add_tfunc(atomic_pointerref, 2, 2, (a, order) -> (@nospecialize; pointer_eltype(a)), 4)
+add_tfunc(atomic_pointerset, 3, 3, (a, v, order) -> (@nospecialize; a), 5)
+add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_eltype(a)), 5)
+add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5)
+add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5)
 
 # more accurate typeof_tfunc for vararg tuples abstract only in length
 function typeof_concrete_vararg(t::DataType)
@@ -530,7 +556,7 @@ function typeof_tfunc(@nospecialize(t))
         return typeof_tfunc(t.ub)
     elseif isa(t, UnionAll)
         u = unwrap_unionall(t)
-        if isa(u, DataType) && !u.abstract
+        if isa(u, DataType) && !isabstracttype(u)
             if u.name === Tuple.name
                 uu = typeof_concrete_vararg(u)
                 if uu !== nothing
@@ -544,7 +570,7 @@ function typeof_tfunc(@nospecialize(t))
     end
     return DataType # typeof(anything)::DataType
 end
-add_tfunc(typeof, 1, 1, typeof_tfunc, 0)
+add_tfunc(typeof, 1, 1, typeof_tfunc, 1)
 
 function typeassert_tfunc(@nospecialize(v), @nospecialize(t))
     t = instanceof_tfunc(t)[1]
@@ -613,10 +639,8 @@ is_dt_const_field(fld::Int) = (
      fld == DATATYPE_PARAMETERS_FIELDINDEX ||
      fld == DATATYPE_TYPES_FIELDINDEX ||
      fld == DATATYPE_SUPER_FIELDINDEX ||
-     fld == DATATYPE_MUTABLE_FIELDINDEX ||
      fld == DATATYPE_INSTANCE_FIELDINDEX ||
-     fld == DATATYPE_NAMES_FIELDINDEX ||
-     fld == DATATYPE_ABSTRACT_FIELDINDEX
+     fld == DATATYPE_HASH_FIELDINDEX
     )
 function const_datatype_getfield_tfunc(@nospecialize(sv), fld::Int)
     if fld == DATATYPE_INSTANCE_FIELDINDEX
@@ -650,7 +674,7 @@ function fieldcount_noerror(@nospecialize t)
         end
         abstr = true
     else
-        abstr = t.abstract || (t.name === Tuple.name && isvatuple(t))
+        abstr = isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
     end
     if abstr
         return nothing
@@ -675,14 +699,25 @@ function try_compute_fieldidx(typ::DataType, @nospecialize(field))
 end
 
 function getfield_nothrow(argtypes::Vector{Any})
-    2 <= length(argtypes) <= 3 || return false
-    length(argtypes) == 2 && return getfield_nothrow(argtypes[1], argtypes[2], Const(true))
-    return getfield_nothrow(argtypes[1], argtypes[2], argtypes[3])
+    if length(argtypes) == 2
+        boundscheck = Bool
+    elseif length(argtypes) == 3
+        boundscheck = argtypes[3]
+        if boundscheck === Const(:not_atomic) # TODO: this is assuming not atomic
+            boundscheck = Bool
+        end
+    elseif length(argtypes) == 4
+        boundscheck = argtypes[4]
+    else
+        return false
+    end
+    widenconst(boundscheck) !== Bool && return false
+    bounds_check_disabled = isa(boundscheck, Const) && boundscheck.val === false
+    return getfield_nothrow(argtypes[1], argtypes[2], !bounds_check_disabled)
 end
-function getfield_nothrow(@nospecialize(s00), @nospecialize(name), @nospecialize(inbounds))
-    bounds_check_disabled = isa(inbounds, Const) && inbounds.val === false
-    # If we don't have invounds and don't know the field, don't even bother
-    if !bounds_check_disabled
+function getfield_nothrow(@nospecialize(s00), @nospecialize(name), boundscheck::Bool)
+    # If we don't have boundscheck and don't know the field, don't even bother
+    if boundscheck
         isa(name, Const) || return false
     end
 
@@ -700,7 +735,7 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), @nospecialize
             end
             return isdefined(sv, name.val)
         end
-        if bounds_check_disabled && !isa(sv, Module)
+        if !boundscheck && !isa(sv, Module)
             # If bounds checking is disabled and all fields are assigned,
             # we may assume that we don't throw
             for i = 1:fieldcount(typeof(sv))
@@ -714,21 +749,22 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), @nospecialize
     s0 = widenconst(s00)
     s = unwrap_unionall(s0)
     if isa(s, Union)
-        return getfield_nothrow(rewrap(s.a, s00), name, inbounds) &&
-            getfield_nothrow(rewrap(s.b, s00), name, inbounds)
+        return getfield_nothrow(rewrap(s.a, s00), name, boundscheck) &&
+               getfield_nothrow(rewrap(s.b, s00), name, boundscheck)
     elseif isa(s, DataType)
         # Can't say anything about abstract types
-        s.abstract && return false
+        isabstracttype(s) && return false
+        s.name.atomicfields == C_NULL || return false # TODO: currently we're only testing for ordering == :not_atomic
         # If all fields are always initialized, and bounds check is disabled, we can assume
         # we don't throw
-        if bounds_check_disabled && !isvatuple(s) && s.name !== NamedTuple.body.body.name && fieldcount(s) == s.ninitialized
+        if !boundscheck && s.name.n_uninitialized == 0
             return true
         end
         # Else we need to know what the field is
         isa(name, Const) || return false
         field = try_compute_fieldidx(s, name.val)
         field === nothing && return false
-        field <= s.ninitialized && return true
+        field <= datatype_min_ninitialized(s) && return true
         # `try_compute_fieldidx` already check for field index bound.
         !isvatuple(s) && isbitstype(fieldtype(s0, field)) && return true
     end
@@ -736,8 +772,8 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), @nospecialize
     return false
 end
 
-getfield_tfunc(@nospecialize(s00), @nospecialize(name), @nospecialize(inbounds)) =
-    getfield_tfunc(s00, name)
+getfield_tfunc(s00, name, boundscheck_or_order) = (@nospecialize; getfield_tfunc(s00, name))
+getfield_tfunc(s00, name, order, boundscheck) = (@nospecialize; getfield_tfunc(s00, name))
 function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
     s = unwrap_unionall(s00)
     if isa(s, Union)
@@ -776,6 +812,8 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
                 if (fld == TYPENAME_NAME_FIELDINDEX ||
                     fld == TYPENAME_MODULE_FIELDINDEX ||
                     fld == TYPENAME_WRAPPER_FIELDINDEX ||
+                    fld == TYPENAME_HASH_FIELDINDEX ||
+                    fld == TYPENAME_FLAGS_FIELDINDEX ||
                     (fld == TYPENAME_NAMES_FIELDINDEX && isdefined(sv, fld)))
                     return Const(getfield(sv, fld))
                 end
@@ -788,19 +826,20 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
             end
         end
         s = typeof(sv)
-    elseif isa(s, PartialStruct)
+    elseif isa(s00, PartialStruct)
+        s = widenconst(s00)
+        sty = unwrap_unionall(s)::DataType
         if isa(name, Const)
             nv = name.val
             if isa(nv, Symbol)
-                nv = fieldindex(widenconst(s), nv, false)
+                nv = fieldindex(sty, nv, false)
             end
-            if isa(nv, Int) && 1 <= nv <= length(s.fields)
-                return unwrapva(s.fields[nv])
+            if isa(nv, Int) && 1 <= nv <= length(s00.fields)
+                return unwrapva(s00.fields[nv])
             end
         end
-        s = widenconst(s)
     end
-    if isType(s) || !isa(s, DataType) || s.abstract
+    if isType(s) || !isa(s, DataType) || isabstracttype(s)
         return Any
     end
     s = s::DataType
@@ -815,7 +854,7 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
     end
     # If no value has this type, then this statement should be unreachable.
     # Bail quickly now.
-    s.has_concrete_subtype || return Union{}
+    has_concrete_subtype(s) || return Union{}
     if s.name === _NAMEDTUPLE_NAME && !isconcretetype(s)
         if isa(name, Const) && isa(name.val, Symbol)
             if isa(s.parameters[1], Tuple)
@@ -889,10 +928,69 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
     end
     return rewrap_unionall(R, s00)
 end
-add_tfunc(getfield, 2, 3, getfield_tfunc, 1)
-add_tfunc(setfield!, 3, 3, (@nospecialize(o), @nospecialize(f), @nospecialize(v)) -> v, 3)
-fieldtype_tfunc(@nospecialize(s0), @nospecialize(name), @nospecialize(inbounds)) =
-    fieldtype_tfunc(s0, name)
+
+setfield!_tfunc(o, f, v, order) = (@nospecialize; v)
+setfield!_tfunc(o, f, v) = (@nospecialize; v)
+
+swapfield!_tfunc(o, f, v, order) = (@nospecialize; getfield_tfunc(o, f))
+swapfield!_tfunc(o, f, v) = (@nospecialize; getfield_tfunc(o, f))
+modifyfield!_tfunc(o, f, op, v, order) = (@nospecialize; modifyfield!_tfunc(o, f, op, v))
+function modifyfield!_tfunc(o, f, op, v)
+    @nospecialize
+    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+    T === Bottom && return Bottom
+    PT = Const(Pair)
+    return instanceof_tfunc(apply_type_tfunc(PT, T, T))[1]
+end
+function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+    nargs = length(argtypes)
+    if !isempty(argtypes) && isvarargtype(argtypes[nargs])
+        nargs - 1 <= 6 || return CallMeta(Bottom, false)
+        nargs > 3 || return CallMeta(Any, false)
+    else
+        5 <= nargs <= 6 || return CallMeta(Bottom, false)
+    end
+    o = unwrapva(argtypes[2])
+    f = unwrapva(argtypes[3])
+    RT = modifyfield!_tfunc(o, f, Any, Any)
+    info = false
+    if nargs >= 5 && RT !== Bottom
+        # we may be able to refine this to a PartialStruct by analyzing `op(o.f, v)::T`
+        # as well as compute the info for the method matches
+        op = unwrapva(argtypes[4])
+        v = unwrapva(argtypes[5])
+        TF = getfield_tfunc(o, f)
+        push!(sv.ssavalue_uses[sv.currpc], sv.currpc) # temporarily disable `call_result_unused` check for this call
+        callinfo = abstract_call(interp, nothing, Any[op, TF, v], sv, #=max_methods=# 1)
+        pop!(sv.ssavalue_uses[sv.currpc], sv.currpc)
+        TF2 = tmeet(callinfo.rt, widenconst(TF))
+        if TF2 === Bottom
+            RT = Bottom
+        elseif isconcretetype(RT) && has_nontrivial_const_info(TF2) # isconcrete condition required to form a PartialStruct
+            RT = PartialStruct(RT, Any[TF, TF2])
+        end
+        info = callinfo.info
+    end
+    return CallMeta(RT, info)
+end
+replacefield!_tfunc(o, f, x, v, success_order, failure_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
+replacefield!_tfunc(o, f, x, v, success_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
+function replacefield!_tfunc(o, f, x, v)
+    @nospecialize
+    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+    T === Bottom && return Bottom
+    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
+    return instanceof_tfunc(apply_type_tfunc(PT, T))[1]
+end
+
+# we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
+
+add_tfunc(getfield, 2, 4, getfield_tfunc, 1)
+add_tfunc(setfield!, 3, 4, setfield!_tfunc, 3)
+
+add_tfunc(swapfield!, 3, 4, swapfield!_tfunc, 3)
+add_tfunc(modifyfield!, 4, 5, modifyfield!_tfunc, 3)
+add_tfunc(replacefield!, 4, 6, replacefield!_tfunc, 3)
 
 function fieldtype_nothrow(@nospecialize(s0), @nospecialize(name))
     s0 === Bottom && return true # unreachable
@@ -926,7 +1024,7 @@ function _fieldtype_nothrow(@nospecialize(s), exact::Bool, name::Const)
         return exact ? (a || b) : (a && b)
     end
     u isa DataType || return false
-    u.abstract && return false
+    isabstracttype(u) && return false
     if u.name === _NAMEDTUPLE_NAME && !isconcretetype(u)
         # TODO: better approximate inference
         return false
@@ -951,14 +1049,15 @@ function _fieldtype_nothrow(@nospecialize(s), exact::Bool, name::Const)
     return true
 end
 
+fieldtype_tfunc(s0, name, boundscheck) = (@nospecialize; fieldtype_tfunc(s0, name))
 function fieldtype_tfunc(@nospecialize(s0), @nospecialize(name))
     if s0 === Bottom
         return Bottom
     end
     if s0 === Any || s0 === Type || DataType ⊑ s0 || UnionAll ⊑ s0
         # For a generic DataType, one of the fields could still be a TypeVar
-        # which is not a Type
-        return Union{Type, TypeVar}
+        # which is not a Type. Tuple{...} can also contain Symbols etc.
+        return Any
     end
     # fieldtype only accepts Types
     if isa(s0, Const) && !(isa(s0.val, DataType) || isa(s0.val, UnionAll) || isa(s0.val, Union))
@@ -983,17 +1082,28 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
     exact = exact && !has_free_typevars(s)
     u = unwrap_unionall(s)
     if isa(u, Union)
-        return tmerge(_fieldtype_tfunc(rewrap(u.a, s), exact, name),
-                      _fieldtype_tfunc(rewrap(u.b, s), exact, name))
+        ta0 = _fieldtype_tfunc(rewrap(u.a, s), exact, name)
+        tb0 = _fieldtype_tfunc(rewrap(u.b, s), exact, name)
+        ta0 ⊑ tb0 && return tb0
+        tb0 ⊑ ta0 && return ta0
+        ta, exacta, _, istypea = instanceof_tfunc(ta0)
+        tb, exactb, _, istypeb = instanceof_tfunc(tb0)
+        if exact && exacta && exactb
+            return Const(Union{ta, tb})
+        end
+        if istypea && istypeb
+            return Type{<:Union{ta, tb}}
+        end
+        return Any
     end
-    u isa DataType || return Union{Type, TypeVar}
-    if u.abstract
+    u isa DataType || return Any
+    if isabstracttype(u)
         # Abstract types have no fields
         exact && return Bottom
         # Type{...} without free typevars has no subtypes, so it is actually
         # exact, even if `exact` is false.
         isType(u) && !has_free_typevars(u.parameters[1]) && return Bottom
-        return Union{Type, TypeVar}
+        return Any
     end
     if u.name === _NAMEDTUPLE_NAME && !isconcretetype(u)
         # TODO: better approximate inference
@@ -1020,8 +1130,15 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
                 else
                     ft1 = Type{ft1}
                 end
+            elseif ft1 isa Type || ft1 isa TypeVar
+                if ft1 === Any && u.name === Tuple.name
+                    # Tuple{:x} is possible in this case
+                    ft1 = Any
+                else
+                    ft1 = Type{ft} where ft<:ft1
+                end
             else
-                ft1 = Type{ft} where ft<:ft1
+                ft1 = Const(ft1)
             end
             t = tmerge(t, ft1)
             t === Any && break
@@ -1044,6 +1161,9 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
     else
         ft = ftypes[fld]
     end
+    if !isa(ft, Type) && !isa(ft, TypeVar)
+        return Const(ft)
+    end
 
     exactft = exact || (!has_free_typevars(ft) && u.name !== Tuple.name)
     ft = rewrap_unionall(ft, s)
@@ -1053,6 +1173,10 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
         end
         return Type{ft}
     end
+    if u.name === Tuple.name && ft === Any
+        # Tuple{:x} is possible
+        return Any
+    end
     return Type{<:ft}
 end
 add_tfunc(fieldtype, 2, 3, fieldtype_tfunc, 0)
@@ -1084,7 +1208,7 @@ function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
     else
         return false
     end
-    # We know the apply_type is well formed. Oherwise our rt would have been
+    # We know the apply_type is well formed. Otherwise our rt would have been
     # Bottom (or Type).
     (headtype === Union) && return true
     isa(rt, Const) && return true
@@ -1403,7 +1527,10 @@ function array_builtin_common_nothrow(argtypes::Array{Any,1}, first_idx_idx::Int
     array_type_undefable(atype) && return false
     # If we have @inbounds (first argument is false), we're allowed to assume
     # we don't throw bounds errors.
-    (isa(argtypes[1], Const) && !argtypes[1].val) && return true
+    boundcheck = argtypes[1]
+    if isa(boundcheck, Const)
+        !(boundcheck.val::Bool) && return true
+    end
     # Else we can't really say anything here
     # TODO: In the future we may be able to track the shapes of arrays though
     # inference.
@@ -1504,7 +1631,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         if length(argtypes) - 1 == tf[2]
             argtypes = argtypes[1:end-1]
         else
-            vatype = argtypes[end]
+            vatype = argtypes[end]::Core.TypeofVararg
             argtypes = argtypes[1:end-1]
             while length(argtypes) < tf[1]
                 push!(argtypes, unwrapva(vatype))
@@ -1522,6 +1649,10 @@ end
 
 # Query whether the given intrinsic is nothrow
 
+_iszero(x) = x === Intrinsics.xor_int(x, x)
+_isneg1(x) = _iszero(Intrinsics.not_int(x))
+_istypemin(x) = !_iszero(x) && Intrinsics.neg_int(x) === x
+
 function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
     # First check that we have the correct number of arguments
     iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
@@ -1548,11 +1679,10 @@ function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
             return false
         end
         den_val = argtypes[2].val
-        den_val !== zero(typeof(den_val)) || return false
+        _iszero(den_val) && return false
         f !== Intrinsics.checked_sdiv_int && return true
         # Nothrow as long as we additionally don't do typemin(T)/-1
-        return den_val !== -1 || (isa(argtypes[1], Const) &&
-            argtypes[1].val !== typemin(typeof(den_val)))
+        return !_isneg1(den_val) || (isa(argtypes[1], Const) && !_istypemin(argtypes[1].val))
     end
     if f === Intrinsics.pointerref
         # Nothrow as long as the types are ok. N.B.: dereferencability is not
@@ -1607,7 +1737,7 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
             aft = argtypes[2]
             if isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
                    (isconcretetype(aft) && !(aft <: Builtin))
-                af_argtype = isa(tt, Const) ? tt.val : tt.parameters[1]
+                af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
                 if isa(af_argtype, DataType) && af_argtype <: Tuple
                     argtypes_vec = Any[aft, af_argtype.parameters...]
                     if contains_is(argtypes_vec, Union{})
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index a20ef946723a7d..f301b8a1756d94 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -1,10 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# build (and start inferring) the inference frame for the linfo
-function typeinf(interp::AbstractInterpreter, result::InferenceResult, cached::Bool)
-    frame = InferenceState(result, cached, interp)
+# build (and start inferring) the inference frame for the top-level MethodInstance
+function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
+    frame = InferenceState(result, cache, interp)
     frame === nothing && return false
-    cached && lock_mi_inference(interp, result.linfo)
+    cache === :global && lock_mi_inference(interp, result.linfo)
     return typeinf(interp, frame)
 end
 
@@ -214,7 +214,7 @@ function finish!(interp::AbstractInterpreter, caller::InferenceResult)
     # If we didn't transform the src for caching, we may have to transform
     # it anyway for users like typeinf_ext. Do that here.
     opt = caller.src
-    if may_optimize(interp) && opt isa OptimizationState
+    if opt isa OptimizationState # implies `may_optimize(interp) === true`
         if opt.ir !== nothing
             caller.src = ir_to_codeinf!(opt)
         end
@@ -243,13 +243,13 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     # collect results for the new expanded frame
     results = Tuple{InferenceResult, Vector{Any}, Bool}[
             ( frames[i].result,
-              frames[i].stmt_edges[1],
+              frames[i].stmt_edges[1]::Vector{Any},
               frames[i].cached )
         for i in 1:length(frames) ]
     empty!(frames)
     for (caller, _, _) in results
         opt = caller.src
-        if may_optimize(interp) && opt isa OptimizationState
+        if opt isa OptimizationState # implies `may_optimize(interp) === true`
             result_type = caller.result
             @assert !(result_type isa LimitedAccuracy)
             optimize(interp, opt, OptimizationParams(interp), result_type)
@@ -341,9 +341,9 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta
     if cache_the_tree
         if may_compress(interp)
             nslots = length(ci.slotflags)
-            resize!(ci.slottypes, nslots)
+            resize!(ci.slottypes::Vector{Any}, nslots)
             resize!(ci.slotnames, nslots)
-            return ccall(:jl_compress_ir, Any, (Any, Any), def, ci)
+            return ccall(:jl_compress_ir, Vector{UInt8}, (Any, Any), def, ci)
         else
             return ci
         end
@@ -354,16 +354,10 @@ end
 
 function transform_result_for_cache(interp::AbstractInterpreter, linfo::MethodInstance,
                                     valid_worlds::WorldRange, @nospecialize(inferred_result))
-    local const_flags::Int32
     # If we decided not to optimize, drop the OptimizationState now.
     # External interpreters can override as necessary to cache additional information
     if inferred_result isa OptimizationState
-        opt = inferred_result
-        if isa(opt.src, CodeInfo)
-            inferred_result = ir_to_codeinf!(opt)
-        else
-            inferred_result = opt.src
-        end
+        inferred_result = ir_to_codeinf!(inferred_result)
     end
     if inferred_result isa CodeInfo
         inferred_result.min_world = first(valid_worlds)
@@ -386,17 +380,18 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
     end
     # check if the existing linfo metadata is also sufficient to describe the current inference result
     # to decide if it is worth caching this
-    already_inferred = already_inferred_quick_test(interp, result.linfo)
-    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), result.linfo)
+    linfo = result.linfo
+    already_inferred = already_inferred_quick_test(interp, linfo)
+    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
         already_inferred = true
     end
 
     # TODO: also don't store inferred code if we've previously decided to interpret this function
     if !already_inferred
-        inferred_result = transform_result_for_cache(interp, result.linfo, valid_worlds, result.src)
-        code_cache(interp)[result.linfo] = CodeInstance(result, inferred_result, valid_worlds)
+        inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result.src)
+        code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
     end
-    unlock_mi_inference(interp, result.linfo)
+    unlock_mi_inference(interp, linfo)
     nothing
 end
 
@@ -428,8 +423,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
     # prepare to run optimization passes on fulltree
     s_edges = me.stmt_edges[1]
     if s_edges === nothing
-        s_edges = []
-        me.stmt_edges[1] = s_edges
+        s_edges = me.stmt_edges[1] = []
     end
     for edges in me.stmt_edges
         edges === nothing && continue
@@ -438,7 +432,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         empty!(edges)
     end
     if me.src.edges !== nothing
-        append!(s_edges, me.src.edges)
+        append!(s_edges, me.src.edges::Vector)
         me.src.edges = nothing
     end
     # inspect whether our inference had a limited result accuracy,
@@ -447,7 +441,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
     limited_ret = me.bestguess isa LimitedAccuracy
     limited_src = false
     if !limited_ret
-        gt = me.src.ssavaluetypes
+        gt = me.src.ssavaluetypes::Vector{Any}
         for j = 1:length(gt)
             gt[j] = gtj = cycle_fix_limited(gt[j], me)
             if gtj isa LimitedAccuracy && me.parent !== nothing
@@ -473,7 +467,7 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
         # either because we are the outermost code, or we might use this later
         doopt = (me.cached || me.parent !== nothing)
         type_annotate!(me, doopt)
-        if doopt
+        if doopt && may_optimize(interp)
             me.result.src = OptimizationState(me, OptimizationParams(interp), interp)
         else
             me.result.src = me.src::CodeInfo # stash a convenience copy of the code (e.g. for reflection)
@@ -511,8 +505,9 @@ end
 
 # widen all Const elements in type annotations
 function widen_all_consts!(src::CodeInfo)
-    for i = 1:length(src.ssavaluetypes)
-        src.ssavaluetypes[i] = widenconst(src.ssavaluetypes[i])
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for i = 1:length(ssavaluetypes)
+        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
     end
 
     for i = 1:length(src.code)
@@ -540,7 +535,7 @@ function annotate_slot_load!(e::Expr, vtypes::VarTable, sv::InferenceState, unde
         subex = e.args[i]
         if isa(subex, Expr)
             annotate_slot_load!(subex, vtypes, sv, undefs)
-        elseif isa(subex, Slot)
+        elseif isa(subex, SlotNumber)
             e.args[i] = visit_slot_load!(subex, vtypes, sv, undefs)
         end
     end
@@ -549,13 +544,13 @@ end
 function annotate_slot_load(@nospecialize(e), vtypes::VarTable, sv::InferenceState, undefs::Array{Bool,1})
     if isa(e, Expr)
         annotate_slot_load!(e, vtypes, sv, undefs)
-    elseif isa(e, Slot)
+    elseif isa(e, SlotNumber)
         return visit_slot_load!(e, vtypes, sv, undefs)
     end
     return e
 end
 
-function visit_slot_load!(sl::Slot, vtypes::VarTable, sv::InferenceState, undefs::Array{Bool,1})
+function visit_slot_load!(sl::SlotNumber, vtypes::VarTable, sv::InferenceState, undefs::Array{Bool,1})
     id = slot_id(sl)
     s = vtypes[id]
     vt = widenconditional(ignorelimited(s.typ))
@@ -577,6 +572,7 @@ function record_slot_assign!(sv::InferenceState)
     states = sv.stmt_types
     body = sv.src.code::Vector{Any}
     slottypes = sv.slottypes::Vector{Any}
+    ssavaluetypes = sv.src.ssavaluetypes::Vector{Any}
     for i = 1:length(body)
         expr = body[i]
         st_i = states[i]
@@ -584,8 +580,8 @@ function record_slot_assign!(sv::InferenceState)
         if isa(st_i, VarTable) && isa(expr, Expr) && expr.head === :(=)
             lhs = expr.args[1]
             rhs = expr.args[2]
-            if isa(lhs, Slot)
-                vt = widenconst(sv.src.ssavaluetypes[i])
+            if isa(lhs, SlotNumber)
+                vt = widenconst(ssavaluetypes[i])
                 if vt !== Bottom
                     id = slot_id(lhs)
                     otherTy = slottypes[id]
@@ -608,12 +604,11 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
     # (otherwise, we'll perhaps run the optimization passes later, outside of inference)
 
     # remove all unused ssa values
-    gt = sv.src.ssavaluetypes
-    for j = 1:length(gt)
-        if gt[j] === NOT_FOUND
-            gt[j] = Union{}
-        end
-        gt[j] = widenconditional(gt[j])
+    src = sv.src
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for j = 1:length(ssavaluetypes)
+        t = ssavaluetypes[j]
+        ssavaluetypes[j] = t === NOT_FOUND ? Union{} : widenconditional(t)
     end
 
     # compute the required type for each slot
@@ -626,7 +621,6 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
     # annotate variables load types
     # remove dead code optimization
     # and compute which variables may be used undef
-    src = sv.src
     states = sv.stmt_types
     nargs = sv.nargs
     nslots = length(states[1]::VarTable)
@@ -639,7 +633,7 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
         expr = body[i]
         if isa(expr, GotoIfNot)
             if !isa(states[expr.dest], VarTable)
-                body[i] = expr.cond
+                body[i] = Expr(:call, GlobalRef(Core, :typeassert), expr.cond, GlobalRef(Core, :Bool))
             end
         end
     end
@@ -660,7 +654,7 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
                 body[i] = ReturnNode(annotate_slot_load(expr.val, st_i, sv, undefs))
             elseif isa(expr, GotoIfNot)
                 body[i] = GotoIfNot(annotate_slot_load(expr.cond, st_i, sv, undefs), expr.dest)
-            elseif isa(expr, Slot)
+            elseif isa(expr, SlotNumber)
                 body[i] = visit_slot_load!(expr, st_i, sv, undefs)
             end
         else
@@ -669,13 +663,11 @@ function type_annotate!(sv::InferenceState, run_optimizer::Bool)
             elseif run_optimizer
                 deleteat!(body, i)
                 deleteat!(states, i)
-                deleteat!(src.ssavaluetypes, i)
+                deleteat!(ssavaluetypes, i)
                 deleteat!(src.codelocs, i)
                 deleteat!(sv.stmt_info, i)
                 nexpr -= 1
-                if oldidx < length(changemap)
-                    changemap[oldidx + 1] = -1
-                end
+                changemap[oldidx] = -1
                 continue
             else
                 body[i] = Const(expr) # annotate that this statement actually is dead
@@ -782,22 +774,30 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
     mi = specialize_method(method, atypes, sparams)::MethodInstance
     code = get(code_cache(interp), mi, nothing)
     if code isa CodeInstance # return existing rettype if the code is already inferred
-        update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
-        rettype = code.rettype
-        if isdefined(code, :rettype_const)
-            rettype_const = code.rettype_const
-            if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
-                return PartialStruct(rettype, rettype_const), mi
-            elseif rettype <: Core.OpaqueClosure && isa(rettype_const, PartialOpaque)
-                return rettype_const, mi
-            elseif isa(rettype_const, InterConditional)
-                return rettype_const, mi
+        if code.inferred === nothing && is_stmt_inline(get_curr_ssaflag(caller))
+            # we already inferred this edge previously and decided to discarded the inferred code
+            # but the inlinear will request to use it, we re-infer it here and keep it around in the local cache
+            cache = :local
+        else
+            update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
+            rettype = code.rettype
+            if isdefined(code, :rettype_const)
+                rettype_const = code.rettype_const
+                if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
+                    return PartialStruct(rettype, rettype_const), mi
+                elseif rettype <: Core.OpaqueClosure && isa(rettype_const, PartialOpaque)
+                    return rettype_const, mi
+                elseif isa(rettype_const, InterConditional)
+                    return rettype_const, mi
+                else
+                    return Const(rettype_const), mi
+                end
             else
-                return Const(rettype_const), mi
+                return rettype, mi
             end
-        else
-            return rettype, mi
         end
+    else
+        cache = :global # cache edge targets by default
     end
     if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0
         return Any, nothing
@@ -813,7 +813,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         # completely new
         lock_mi_inference(interp, mi)
         result = InferenceResult(mi)
-        frame = InferenceState(result, #=cached=#true, interp) # always use the cache for edge targets
+        frame = InferenceState(result, cache, interp) # always use the cache for edge targets
         if frame === nothing
             # can't get the source for this, so we know nothing
             unlock_mi_inference(interp, mi)
@@ -842,14 +842,9 @@ function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize
     mi = specialize_method(method, atypes, sparams)::MethodInstance
     ccall(:jl_typeinf_begin, Cvoid, ())
     result = InferenceResult(mi)
-    frame = InferenceState(result, false, interp)
+    frame = InferenceState(result, run_optimizer ? :global : :no, interp)
     frame === nothing && return (nothing, Any)
-    if typeinf(interp, frame) && run_optimizer
-        opt_params = OptimizationParams(interp)
-        result.src = OptimizationState(frame, opt_params, interp)
-        optimize(interp, result.src, opt_params, ignorelimited(result.result))
-        frame.src = finish!(interp, result)
-    end
+    typeinf(interp, frame)
     ccall(:jl_typeinf_end, Cvoid, ())
     frame.inferred || return (nothing, Any)
     return (frame.src, widenconst(ignorelimited(result.result)))
@@ -906,7 +901,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
         return retrieve_code_info(mi)
     end
     lock_mi_inference(interp, mi)
-    frame = InferenceState(InferenceResult(mi), #=cached=#true, interp)
+    frame = InferenceState(InferenceResult(mi), #=cache=#:global, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
     ccall(:jl_typeinf_end, Cvoid, ())
@@ -929,11 +924,11 @@ function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize
             return code.rettype
         end
     end
-    frame = InferenceResult(mi)
-    typeinf(interp, frame, true)
+    result = InferenceResult(mi)
+    typeinf(interp, result, :global)
     ccall(:jl_typeinf_end, Cvoid, ())
-    frame.result isa InferenceState && return nothing
-    return widenconst(ignorelimited(frame.result))
+    result.result isa InferenceState && return nothing
+    return widenconst(ignorelimited(result.result))
 end
 
 # This is a bridge for the C code calling `jl_typeinf_func()`
@@ -949,7 +944,7 @@ function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance
             ccall(:jl_typeinf_begin, Cvoid, ())
             if !src.inferred
                 result = InferenceResult(linfo)
-                frame = InferenceState(result, src, #=cached=#true, interp)
+                frame = InferenceState(result, src, #=cache=#:global, interp)
                 typeinf(interp, frame)
                 @assert frame.inferred # TODO: deal with this better
                 src = frame.src
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index e2668e15c9e2cf..2f026d41efb355 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -19,7 +19,7 @@
 import Core: Const, PartialStruct
 
 # The type of this value might be Bool.
-# However, to enable a limited amount of back-propagagation,
+# However, to enable a limited amount of back-propagation,
 # we also keep some information about how this Bool value was created.
 # In particular, if you branch on this value, then may assume that in
 # the true branch, the type of `var` will be limited by `vtype` and in
@@ -33,7 +33,7 @@ import Core: Const, PartialStruct
 # end
 # ```
 struct Conditional
-    var::Slot
+    var::SlotNumber
     vtype
     elsetype
     function Conditional(
@@ -84,20 +84,30 @@ end
 const VarTable = Array{Any,1}
 
 struct StateUpdate
-    var::Union{Slot,SSAValue}
+    var::SlotNumber
     vtype::VarState
     state::VarTable
     conditional::Bool
 end
 
 # Represent that the type estimate has been approximated, due to "causes"
-# (only used in abstractinterpret, doesn't appear in optimize)
+# (only used in abstract interpretion, doesn't appear in optimization)
 # N.B. in the lattice, this is epsilon smaller than `typ` (except Union{})
 struct LimitedAccuracy
     typ
     causes::IdSet{InferenceState}
-    LimitedAccuracy(@nospecialize(typ), causes::IdSet{InferenceState}) =
-        new(typ, causes)
+    function LimitedAccuracy(@nospecialize(typ), causes::IdSet{InferenceState})
+        @assert !isa(typ, LimitedAccuracy) "malformed LimitedAccuracy"
+        return new(typ, causes)
+    end
+end
+
+@inline function collect_limitations!(@nospecialize(typ), sv::InferenceState)
+    if isa(typ, LimitedAccuracy)
+        union!(sv.pclimitations, typ.causes)
+        return typ.typ
+    end
+    return typ
 end
 
 struct NotFound end
@@ -299,7 +309,7 @@ function smerge(sa::Union{NotFound,VarState}, sb::Union{NotFound,VarState})
 end
 
 @inline tchanged(@nospecialize(n), @nospecialize(o)) = o === NOT_FOUND || (n !== NOT_FOUND && !(n ⊑ o))
-@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n, o)))
+@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n::VarState, o::VarState)))
 
 widenconditional(@nospecialize typ) = typ
 function widenconditional(typ::AnyConditional)
@@ -313,28 +323,26 @@ function widenconditional(typ::AnyConditional)
 end
 widenconditional(t::LimitedAccuracy) = error("unhandled LimitedAccuracy")
 
+widenwrappedconditional(@nospecialize(typ))   = widenconditional(typ)
+widenwrappedconditional(typ::LimitedAccuracy) = LimitedAccuracy(widenconditional(typ.typ), typ.causes)
+
 ignorelimited(@nospecialize typ) = typ
 ignorelimited(typ::LimitedAccuracy) = typ.typ
 
 function stupdate!(state::Nothing, changes::StateUpdate)
     newst = copy(changes.state)
-    if isa(changes.var, Slot)
-        changeid = slot_id(changes.var::Slot)
-        newst[changeid] = changes.vtype
-        # remove any Conditional for this Slot from the vtable
-        # (unless this change is came from the conditional)
-        if !changes.conditional
-            for i = 1:length(newst)
-                newtype = newst[i]
-                if isa(newtype, VarState)
-                    newtypetyp = ignorelimited(newtype.typ)
-                    if isa(newtypetyp, Conditional) && slot_id(newtypetyp.var) == changeid
-                        newtypetyp = widenconditional(newtypetyp)
-                        if newtype.typ isa LimitedAccuracy
-                            newtypetyp = LimitedAccuracy(newtypetyp, newtype.typ.causes)
-                        end
-                        newst[i] = VarState(newtypetyp, newtype.undef)
-                    end
+    changeid = slot_id(changes.var)
+    newst[changeid] = changes.vtype
+    # remove any Conditional for this slot from the vtable
+    # (unless this change is came from the conditional)
+    if !changes.conditional
+        for i = 1:length(newst)
+            newtype = newst[i]
+            if isa(newtype, VarState)
+                newtypetyp = ignorelimited(newtype.typ)
+                if isa(newtypetyp, Conditional) && slot_id(newtypetyp.var) == changeid
+                    newtypetyp = widenwrappedconditional(newtype.typ)
+                    newst[i] = VarState(newtypetyp, newtype.undef)
                 end
             end
         end
@@ -343,11 +351,8 @@ function stupdate!(state::Nothing, changes::StateUpdate)
 end
 
 function stupdate!(state::VarTable, changes::StateUpdate)
-    if !isa(changes.var, Slot)
-        return stupdate!(state, changes.state)
-    end
     newstate = nothing
-    changeid = slot_id(changes.var::Slot)
+    changeid = slot_id(changes.var)
     for i = 1:length(state)
         if i == changeid
             newtype = changes.vtype
@@ -355,15 +360,12 @@ function stupdate!(state::VarTable, changes::StateUpdate)
             newtype = changes.state[i]
         end
         oldtype = state[i]
-        # remove any Conditional for this Slot from the vtable
+        # remove any Conditional for this slot from the vtable
         # (unless this change is came from the conditional)
         if !changes.conditional && isa(newtype, VarState)
             newtypetyp = ignorelimited(newtype.typ)
             if isa(newtypetyp, Conditional) && slot_id(newtypetyp.var) == changeid
-                newtypetyp = widenconditional(newtypetyp)
-                if newtype.typ isa LimitedAccuracy
-                    newtypetyp = LimitedAccuracy(newtypetyp, newtype.typ.causes)
-                end
+                newtypetyp = widenwrappedconditional(newtype.typ)
                 newtype = VarState(newtypetyp, newtype.undef)
             end
         end
@@ -393,11 +395,8 @@ stupdate!(state::Nothing, changes::VarTable) = copy(changes)
 stupdate!(state::Nothing, changes::Nothing) = nothing
 
 function stupdate1!(state::VarTable, change::StateUpdate)
-    if !isa(change.var, Slot)
-        return false
-    end
-    changeid = slot_id(change.var::Slot)
-    # remove any Conditional for this Slot from the catch block vtable
+    changeid = slot_id(change.var)
+    # remove any Conditional for this slot from the catch block vtable
     # (unless this change is came from the conditional)
     if !change.conditional
         for i = 1:length(state)
@@ -407,7 +406,7 @@ function stupdate1!(state::VarTable, change::StateUpdate)
                 if isa(oldtypetyp, Conditional) && slot_id(oldtypetyp.var) == changeid
                     oldtypetyp = widenconditional(oldtypetyp)
                     if oldtype.typ isa LimitedAccuracy
-                        oldtypetyp = LimitedAccuracy(oldtypetyp, oldtype.typ.causes)
+                        oldtypetyp = LimitedAccuracy(oldtypetyp, (oldtype.typ::LimitedAccuracy).causes)
                     end
                     state[i] = VarState(oldtypetyp, oldtype.undef)
                 end
diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index cb584a6788f59b..23045c65cc6bbb 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -128,7 +128,15 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
         end
         return Vararg{VaT}
     elseif isa(t, DataType)
-        if isa(c, DataType)
+        if isa(c, Core.TypeofVararg)
+            # Tuple{Vararg{T}} --> Tuple{T} is OK
+            return _limit_type_size(t, unwrapva(c), sources, depth, 0)
+        elseif isType(t) # allow taking typeof as Type{...}, but ensure it doesn't start nesting
+            tt = unwrap_unionall(t.parameters[1])
+            (!isa(tt, DataType) || isType(tt)) && (depth += 1)
+            is_derived_type_from_any(tt, sources, depth) && return t
+            return Type
+        elseif isa(c, DataType)
             tP = t.parameters
             cP = c.parameters
             if t.name === c.name && !isempty(cP)
@@ -158,15 +166,6 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
                     return Tuple{Q...}
                 end
             end
-        elseif isa(c, Core.TypeofVararg)
-            # Tuple{Vararg{T}} --> Tuple{T} is OK
-            return _limit_type_size(t, c.T, sources, depth, 0)
-        end
-        if isType(t) # allow taking typeof as Type{...}, but ensure it doesn't start nesting
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, DataType) && !isType(tt)
-                is_derived_type_from_any(tt, sources, depth) && return t
-            end
         end
         if allowed_tuplelen < 1 && t.name === Tuple.name
             return Any
@@ -226,9 +225,19 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         return t !== 1 && !(0 <= t < c) # alternatively, could use !(abs(t) <= abs(c) || abs(t) < n) for some n
     end
     # base case for data types
-    if isa(t, DataType)
+    if isa(t, Core.TypeofVararg)
+        if isa(c, Core.TypeofVararg)
+            return type_more_complex(unwrapva(t), unwrapva(c), sources, depth + 1, tupledepth, 0)
+        end
+    elseif isa(t, DataType)
         tP = t.parameters
-        if isa(c, DataType) && t.name === c.name
+        if isa(c, Core.TypeofVararg)
+            return type_more_complex(t, unwrapva(c), sources, depth, tupledepth, 0)
+        elseif isType(t) # allow taking typeof any source type anywhere as Type{...}, as long as it isn't nesting Type{Type{...}}
+            tt = unwrap_unionall(t.parameters[1])
+            (!isa(tt, DataType) || isType(tt)) && (depth += 1)
+            return !is_derived_type_from_any(tt, sources, depth)
+        elseif isa(c, DataType) && t.name === c.name
             cP = c.parameters
             length(cP) < length(tP) && return true
             length(cP) > length(tP) && !isvarargtype(tP[end]) && depth == 1 && return false
@@ -236,7 +245,7 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
             # allow creating variation within a nested tuple, but only so deep
             if t.name === Tuple.name && tupledepth > 0
                 tupledepth -= 1
-            elseif !isvarargtype(t)
+            else
                 tupledepth = 0
             end
             isgenerator = (t.name.name === :Generator && t.name.module === _topmod(t.name.module))
@@ -247,7 +256,7 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
                     let tPi = unwrap_unionall(tPi),
                         cPi = unwrap_unionall(cPi)
                         if isa(tPi, DataType) && isa(cPi, DataType) &&
-                                !tPi.abstract && !cPi.abstract &&
+                            !isabstracttype(tPi) && !isabstracttype(cPi) &&
                                 sym_isless(cPi.name.name, tPi.name.name)
                             # allow collect on (anonymous) Generators to nest, provided that their functions are appropriately ordered
                             # TODO: is there a better way?
@@ -258,15 +267,6 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
                 type_more_complex(tPi, cPi, sources, depth + 1, tupledepth, 0) && return true
             end
             return false
-        elseif isvarargtype(c)
-            return type_more_complex(t, unwrapva(c), sources, depth, tupledepth, 0)
-        end
-        if isType(t) # allow taking typeof any source type anywhere as Type{...}, as long as it isn't nesting Type{Type{...}}
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, DataType) && !isType(tt)
-                is_derived_type_from_any(tt, sources, depth) || return true
-                return false
-            end
         end
     end
     return true
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 773047d2b00e5f..5f8f6563124584 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -6,16 +6,16 @@
 An abstract base class that allows multiple dispatch to determine the method of
 executing Julia code.  The native Julia LLVM pipeline is enabled by using the
 `NativeInterpreter` concrete instantiation of this abstract class, others can be
-swapped in as long as they follow the AbstractInterpreter API.
+swapped in as long as they follow the `AbstractInterpreter` API.
 
-All AbstractInterpreters are expected to provide at least the following methods:
-
-- InferenceParams(interp) - return an `InferenceParams` instance
-- OptimizationParams(interp) - return an `OptimizationParams` instance
-- get_world_counter(interp) - return the world age for this interpreter
-- get_inference_cache(interp) - return the runtime inference cache
+If `interp` is an `AbstractInterpreter`, it is expected to provide at least the following methods:
+- `InferenceParams(interp)` - return an `InferenceParams` instance
+- `OptimizationParams(interp)` - return an `OptimizationParams` instance
+- `get_world_counter(interp)` - return the world age for this interpreter
+- `get_inference_cache(interp)` - return the runtime inference cache
+- `code_cache(interp)` - return the global inference cache
 """
-abstract type AbstractInterpreter; end
+abstract type AbstractInterpreter end
 
 """
     InferenceResult
@@ -35,7 +35,6 @@ mutable struct InferenceResult
     end
 end
 
-
 """
     OptimizationParams
 
@@ -185,43 +184,52 @@ InferenceParams(ni::NativeInterpreter) = ni.inf_params
 OptimizationParams(ni::NativeInterpreter) = ni.opt_params
 get_world_counter(ni::NativeInterpreter) = ni.world
 get_inference_cache(ni::NativeInterpreter) = ni.cache
-
-code_cache(ni::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, ni.world)
+code_cache(ni::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, get_world_counter(ni))
 
 """
     lock_mi_inference(ni::NativeInterpreter, mi::MethodInstance)
 
-Hint that `mi` is in inference to help accelerate bootstrapping. This helps limit the amount of wasted work we might do when inference is working on initially inferring itself by letting us detect when inference is already in progress and not running a second copy on it. This creates a data-race, but the entry point into this code from C (jl_type_infer) already includes detection and restriction on recursion, so it is hopefully mostly a benign problem (since it should really only happen during the first phase of bootstrapping that we encounter this flag).
+Hint that `mi` is in inference to help accelerate bootstrapping.
+This helps us limit the amount of wasted work we might do when inference is working on initially inferring itself
+by letting us detect when inference is already in progress and not running a second copy on it.
+This creates a data-race, but the entry point into this code from C (`jl_type_infer`) already includes detection and restriction on recursion,
+so it is hopefully mostly a benign problem (since it should really only happen during the first phase of bootstrapping that we encounter this flag).
 """
-lock_mi_inference(ni::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
+lock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
+lock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
 
 """
-    See lock_mi_inference
+See `lock_mi_inference`.
 """
-unlock_mi_inference(ni::NativeInterpreter, mi::MethodInstance) = (mi.inInference = false; nothing)
+unlock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = false; nothing)
+unlock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
 
 """
-Emit an analysis remark during inference for the current line (`sv.pc`). These annotations are ignored
-by the native interpreter, but can be used by external tooling to annotate
-inference results.
+Emit an analysis remark during inference for the current line (`sv.pc`).
+These annotations are ignored by the native interpreter, but can be used by external tooling
+to annotate inference results.
 """
-add_remark!(ni::NativeInterpreter, sv, s) = nothing
+add_remark!(::AbstractInterpreter, sv#=::InferenceState=#, s) = return
 
-may_optimize(ni::NativeInterpreter) = true
-may_compress(ni::NativeInterpreter) = true
-may_discard_trees(ni::NativeInterpreter) = true
-verbose_stmt_info(ni::NativeInterpreter) = false
+may_optimize(::AbstractInterpreter) = true
+may_compress(::AbstractInterpreter) = true
+may_discard_trees(::AbstractInterpreter) = true
+verbose_stmt_info(::AbstractInterpreter) = false
 
-method_table(ai::AbstractInterpreter) = InternalMethodTable(get_world_counter(ai))
-inlining_policy(ai::AbstractInterpreter) = default_inlining_policy
+method_table(interp::AbstractInterpreter) = InternalMethodTable(get_world_counter(interp))
 
-# define inference bail out logic
-# `NativeInterpreter` bails out from inference when
-# - a lattice element grows up to `Any` (inter-procedural call, abstract apply)
-# - a lattice element gets down to `Bottom` (statement inference, local frame inference)
-# - inferring non-concrete toplevel call sites
-bail_out_call(interp::AbstractInterpreter, @nospecialize(t), sv)      = t === Any
-bail_out_apply(interp::AbstractInterpreter, @nospecialize(t), sv)     = t === Any
-function bail_out_toplevel_call(interp::AbstractInterpreter, @nospecialize(sig), sv)
-    return isa(sv.linfo.def, Module) && !isdispatchtuple(sig)
-end
+"""
+By default `AbstractInterpreter` implements the following inference bail out logic:
+- `bail_out_toplevel_call(::AbstractInterpreter, sig, ::InferenceState)`: bail out from inter-procedural inference when inferring top-level and non-concrete call site `callsig`
+- `bail_out_call(::AbstractInterpreter, rt, ::InferenceState)`: bail out from inter-procedural inference when return type `rt` grows up to `Any`
+- `bail_out_apply(::AbstractInterpreter, rt, ::InferenceState)`: bail out from `_apply_iterate` inference when return type `rt` grows up to `Any`
+
+It also bails out from local statement/frame inference when any lattice element gets down to `Bottom`,
+but `AbstractInterpreter` doesn't provide a specific interface for configuring it.
+"""
+bail_out_toplevel_call(::AbstractInterpreter, @nospecialize(callsig), sv#=::InferenceState=#) =
+    return isa(sv.linfo.def, Module) && !isdispatchtuple(callsig)
+bail_out_call(::AbstractInterpreter, @nospecialize(rt), sv#=::InferenceState=#) =
+    return rt === Any
+bail_out_apply(::AbstractInterpreter, @nospecialize(rt), sv#=::InferenceState=#) =
+    return rt === Any
diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl
index fc9282394b91ae..c869759f97d9a6 100644
--- a/base/compiler/typeutils.jl
+++ b/base/compiler/typeutils.jl
@@ -42,6 +42,8 @@ end
 
 has_const_info(@nospecialize x) = (!isa(x, Type) && !isvarargtype(x)) || isType(x)
 
+has_concrete_subtype(d::DataType) = d.flags & 0x20 == 0x20
+
 # Subtyping currently intentionally answers certain queries incorrectly for kind types. For
 # some of these queries, this check can be used to somewhat protect against making incorrect
 # decisions based on incorrect subtyping. Note that this check, itself, is broken for
@@ -56,6 +58,37 @@ function isknownlength(t::DataType)
     return isdefined(va, :N) && va.N isa Int
 end
 
+# Compute the minimum number of initialized fields for a particular datatype
+# (therefore also a lower bound on the number of fields)
+function datatype_min_ninitialized(t::DataType)
+    isabstracttype(t) && return 0
+    if t.name === NamedTuple_typename
+        names, types = t.parameters[1], t.parameters[2]
+        if names isa Tuple
+            return length(names)
+        end
+        t = argument_datatype(types)
+        t isa DataType || return 0
+        t.name === Tuple.name || return 0
+    end
+    if t.name === Tuple.name
+        n = length(t.parameters)
+        n == 0 && return 0
+        va = t.parameters[n]
+        if isvarargtype(va)
+            n -= 1
+            if isdefined(va, :N)
+                va = va.N
+                if va isa Int
+                    n += va
+                end
+            end
+        end
+        return n
+    end
+    return length(t.name.names) - t.name.n_uninitialized
+end
+
 # test if non-Type, non-TypeVar `x` can be used to parameterize a type
 function valid_tparam(@nospecialize(x))
     if isa(x, Tuple)
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index 4e9d2b74cf40cf..8dfe1f65f0d539 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -4,7 +4,7 @@
 # generic #
 ###########
 
-if !isdefined(@__MODULE__, Symbol("@timeit"))
+if !@isdefined(var"@timeit")
     # This is designed to allow inserting timers when loading a second copy
     # of inference for performing performance experiments.
     macro timeit(args...)
@@ -59,7 +59,7 @@ end
 
 # Meta expression head, these generally can't be deleted even when they are
 # in a dead branch but can be ignored when analyzing uses/liveness.
-is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo)
+is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo
 
 sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0
 
@@ -134,6 +134,7 @@ function retrieve_code_info(linfo::MethodInstance)
         c.parent = linfo
         return c
     end
+    return nothing
 end
 
 # Get at the nonfunction_mt, which happens to be the mt of SimpleVector
@@ -155,7 +156,15 @@ function subst_trivial_bounds(@nospecialize(atypes))
     end
     v = atypes.var
     if isconcretetype(v.ub) || v.lb === v.ub
-        return subst_trivial_bounds(atypes{v.ub})
+        subst = try
+            atypes{v.ub}
+        catch
+            # Note in rare cases a var bound might not be valid to substitute.
+            nothing
+        end
+        if subst !== nothing
+            return subst_trivial_bounds(subst)
+        end
     end
     return UnionAll(v, subst_trivial_bounds(atypes.body))
 end
@@ -175,7 +184,7 @@ function normalize_typevars(method::Method, @nospecialize(atypes), sparams::Simp
 end
 
 # get a handle to the unique specialization object representing a particular instantiation of a call
-function specialize_method(method::Method, @nospecialize(atypes), sparams::SimpleVector, preexisting::Bool=false, compilesig::Bool=false)
+function specialize_method(method::Method, @nospecialize(atypes), sparams::SimpleVector; preexisting::Bool=false, compilesig::Bool=false)
     if isa(atypes, UnionAll)
         atypes, sparams = normalize_typevars(method, atypes, sparams)
     end
@@ -187,19 +196,19 @@ function specialize_method(method::Method, @nospecialize(atypes), sparams::Simpl
     if preexisting
         # check cached specializations
         # for an existing result stored there
-        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atypes)
+        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atypes)::Union{Nothing,MethodInstance}
     end
     return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atypes, sparams)
 end
 
-function specialize_method(match::MethodMatch, preexisting::Bool=false, compilesig::Bool=false)
-    return specialize_method(match.method, match.spec_types, match.sparams, preexisting, compilesig)
+function specialize_method(match::MethodMatch; kwargs...)
+    return specialize_method(match.method, match.spec_types, match.sparams; kwargs...)
 end
 
 # This function is used for computing alternate limit heuristics
 function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector)
     if isdefined(method, :generator) && method.generator.expand_early && may_invoke_generator(method, sig, sparams)
-        method_instance = specialize_method(method, sig, sparams, false)
+        method_instance = specialize_method(method, sig, sparams)
         if isa(method_instance, MethodInstance)
             cinfo = get_staged(method_instance)
             if isa(cinfo, CodeInfo)
@@ -220,7 +229,7 @@ const empty_slottypes = Any[]
 function argextype(@nospecialize(x), src, sptypes::Vector{Any}, slottypes::Vector{Any} = empty_slottypes)
     if isa(x, Expr)
         if x.head === :static_parameter
-            return sptypes[x.args[1]]
+            return sptypes[x.args[1]::Int]
         elseif x.head === :boundscheck
             return Bool
         elseif x.head === :copyast
@@ -267,6 +276,8 @@ function find_ssavalue_uses(body::Vector{Any}, nvals::Int)
             push!(uses[e.id], line)
         elseif isa(e, Expr)
             find_ssavalue_uses(e, uses, line)
+        elseif isa(e, PhiNode)
+            find_ssavalue_uses(e, uses, line)
         end
     end
     return uses
@@ -287,6 +298,14 @@ function find_ssavalue_uses(e::Expr, uses::Vector{BitSet}, line::Int)
     end
 end
 
+function find_ssavalue_uses(e::PhiNode, uses::Vector{BitSet}, line::Int)
+    for val in e.values
+        if isa(val, SSAValue)
+            push!(uses[val.id], line)
+        end
+    end
+end
+
 function is_throw_call(e::Expr)
     if e.head === :call
         f = e.args[1]
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index 751b594130ad29..b7e63900a3fdf3 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -1,9 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Expr head => argument count bounds
-const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange}(
+const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :call => 1:typemax(Int),
     :invoke => 2:typemax(Int),
+    :invoke_modify => 3:typemax(Int),
     :static_parameter => 1:1,
     :(&) => 1:1,
     :(=) => 2:2,
@@ -16,6 +17,8 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange}(
     :leave => 1:1,
     :pop_exception => 1:1,
     :inbounds => 1:1,
+    :inline => 1:1,
+    :noinline => 1:1,
     :boundscheck => 0:0,
     :copyast => 1:1,
     :meta => 0:typemax(Int),
@@ -51,7 +54,7 @@ const SIGNATURE_NARGS_MISMATCH = "method signature does not match number of meth
 const SLOTNAMES_NARGS_MISMATCH = "CodeInfo for method contains fewer slotnames than the number of method arguments"
 
 struct InvalidCodeError <: Exception
-    kind::AbstractString
+    kind::String
     meta::Any
 end
 InvalidCodeError(kind::AbstractString) = InvalidCodeError(kind, nothing)
@@ -76,7 +79,7 @@ end
 
 function _validate_val!(@nospecialize(x), errors, ssavals::BitSet)
     if isa(x, Expr)
-        if x.head === :call || x.head === :invoke
+        if x.head === :call || x.head === :invoke || x.head === :invoke_modify
             f = x.args[1]
             if f isa GlobalRef && (f.name === :cglobal) && x.head === :call
                 # TODO: these are not yet linearized
@@ -136,12 +139,13 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
                 end
                 validate_val!(lhs)
                 validate_val!(rhs)
-            elseif head === :call || head === :invoke || head === :gc_preserve_end || head === :meta ||
+            elseif head === :call || head === :invoke || x.head === :invoke_modify ||
+                head === :gc_preserve_end || head === :meta ||
                 head === :inbounds || head === :foreigncall || head === :cfunction ||
                 head === :const || head === :enter || head === :leave || head === :pop_exception ||
                 head === :method || head === :global || head === :static_parameter ||
                 head === :new || head === :splatnew || head === :thunk || head === :loopinfo ||
-                head === :throw_undef_if_not || head === :code_coverage_effect
+                head === :throw_undef_if_not || head === :code_coverage_effect || head === :inline || head === :noinline
                 validate_val!(x)
             else
                 # TODO: nothing is actually in statement position anymore
@@ -180,10 +184,11 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
     !is_top_level && nslotnames == 0 && push!(errors, InvalidCodeError(EMPTY_SLOTNAMES))
     nslotnames < nslotflags && push!(errors, InvalidCodeError(SLOTFLAGS_MISMATCH, (nslotnames, nslotflags)))
     if c.inferred
-        nssavaluetypes = length(c.ssavaluetypes)
+        nssavaluetypes = length(c.ssavaluetypes::Vector{Any})
         nssavaluetypes < nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH, (nssavals, nssavaluetypes)))
     else
-        c.ssavaluetypes != nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, c.ssavaluetypes)))
+        ssavaluetypes = c.ssavaluetypes::Int
+        ssavaluetypes != nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, ssavaluetypes)))
     end
     return errors
 end
@@ -205,7 +210,7 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, mi::Core.MethodInsta
     else
         m = mi.def::Method
         mnargs = m.nargs
-        n_sig_params = length(Core.Compiler.unwrap_unionall(m.sig).parameters)
+        n_sig_params = length((unwrap_unionall(m.sig)::DataType).parameters)
         if (m.isva ? (n_sig_params < (mnargs - 1)) : (n_sig_params != mnargs))
             push!(errors, InvalidCodeError(SIGNATURE_NARGS_MISMATCH, (m.isva, n_sig_params, mnargs)))
         end
@@ -235,7 +240,7 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
+    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
         return true
     end
     return false
diff --git a/base/complex.jl b/base/complex.jl
index 7cf266f2bc4dec..4fe736a7c04653 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -7,6 +7,8 @@ Complex number type with real and imaginary part of type `T`.
 
 `ComplexF16`, `ComplexF32` and `ComplexF64` are aliases for
 `Complex{Float16}`, `Complex{Float32}` and `Complex{Float64}` respectively.
+
+See also: [`Real`](@ref), [`complex`](@ref), [`real`](@ref).
 """
 struct Complex{T<:Real} <: Number
     re::T
@@ -20,10 +22,15 @@ Complex(x::Real) = Complex(x, zero(x))
 
 The imaginary unit.
 
+See also: [`imag`](@ref), [`angle`](@ref), [`complex`](@ref).
+
 # Examples
 ```jldoctest
 julia> im * im
 -1 + 0im
+
+julia> (2.0 + 3im)^2
+-5.0 + 12.0im
 ```
 """
 const im = Complex(false, true)
@@ -54,6 +61,8 @@ float(::Type{Complex{T}}) where {T} = Complex{float(T)}
 
 Return the real part of the complex number `z`.
 
+See also: [`imag`](@ref), [`reim`](@ref), [`complex`](@ref), [`isreal`](@ref), [`Real`](@ref).
+
 # Examples
 ```jldoctest
 julia> real(1 + 3im)
@@ -67,6 +76,8 @@ real(z::Complex) = z.re
 
 Return the imaginary part of the complex number `z`.
 
+See also: [`conj`](@ref), [`reim`](@ref), [`adjoint`](@ref), [`angle`](@ref).
+
 # Examples
 ```jldoctest
 julia> imag(1 + 3im)
@@ -254,6 +265,8 @@ end
 
 Compute the complex conjugate of a complex number `z`.
 
+See also: [`angle`](@ref), [`adjoint`](@ref).
+
 # Examples
 ```jldoctest
 julia> conj(1 + 3im)
@@ -533,6 +546,8 @@ end
 
 Return ``\\exp(iz)``.
 
+See also [`cispi`](@ref), [`angle`](@ref).
+
 # Examples
 ```jldoctest
 julia> cis(π) ≈ -1
@@ -574,6 +589,8 @@ end
 
 Compute the phase angle in radians of a complex number `z`.
 
+See also: [`atan`](@ref), [`cis`](@ref).
+
 # Examples
 ```jldoctest
 julia> rad2deg(angle(1 + im))
diff --git a/base/condition.jl b/base/condition.jl
index 4b9f57e47ab290..be0f618865a48f 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -5,7 +5,7 @@
 @noinline function concurrency_violation()
     # can be useful for debugging
     #try; error(); catch; ccall(:jlbacktrace, Cvoid, ()); end
-    error("concurrency violation detected")
+    throw(ConcurrencyViolationError("lock must be held"))
 end
 
 """
@@ -76,7 +76,6 @@ trylock(c::GenericCondition) = trylock(c.lock)
 islocked(c::GenericCondition) = islocked(c.lock)
 
 lock(f, c::GenericCondition) = lock(f, c.lock)
-unlock(f, c::GenericCondition) = unlock(f, c.lock)
 
 # have waiter wait for c
 function _wait2(c::GenericCondition, waiter::Task)
diff --git a/base/coreio.jl b/base/coreio.jl
index 2796c53e759f54..d0f8df290b41b7 100644
--- a/base/coreio.jl
+++ b/base/coreio.jl
@@ -9,22 +9,24 @@ function repr end
 
 struct DevNull <: IO end
 const devnull = DevNull()
-isreadable(::DevNull) = false
-iswritable(::DevNull) = true
-isopen(::DevNull) = true
-read(::DevNull, ::Type{UInt8}) = throw(EOFError())
 write(::DevNull, ::UInt8) = 1
 unsafe_write(::DevNull, ::Ptr{UInt8}, n::UInt)::Int = n
 close(::DevNull) = nothing
-flush(::DevNull) = nothing
-wait_readnb(::DevNull) = wait()
 wait_close(::DevNull) = wait()
-eof(::DevNull) = true
+bytesavailable(io::DevNull) = 0
 
 let CoreIO = Union{Core.CoreSTDOUT, Core.CoreSTDERR}
-    global write, unsafe_write
-    write(io::CoreIO, x::UInt8) = Core.write(io, x)
-    unsafe_write(io::CoreIO, x::Ptr{UInt8}, nb::UInt) = Core.unsafe_write(io, x, nb)
+    global write(io::CoreIO, x::UInt8) = Core.write(io, x)
+    global unsafe_write(io::CoreIO, x::Ptr{UInt8}, nb::UInt) = Core.unsafe_write(io, x, nb)
+
+    CoreIO = Union{CoreIO, DevNull}
+    global read(::CoreIO, ::Type{UInt8}) = throw(EOFError())
+    global isopen(::CoreIO) = true
+    global isreadable(::CoreIO) = false
+    global iswritable(::CoreIO) = true
+    global flush(::CoreIO) = nothing
+    global eof(::CoreIO) = true
+    global wait_readnb(::CoreIO, nb::Int) = nothing
 end
 
 stdin = devnull
diff --git a/base/deepcopy.jl b/base/deepcopy.jl
index 36c9c399def541..7ea5a041fc6327 100644
--- a/base/deepcopy.jl
+++ b/base/deepcopy.jl
@@ -53,7 +53,7 @@ end
 function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
     T = typeof(x)::DataType
     nf = nfields(x)
-    if T.mutable
+    if ismutable(x)
         if haskey(stackdict, x)
             return stackdict[x]
         end
@@ -87,7 +87,7 @@ end
 
 function deepcopy_internal(x::Array, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     _deepcopy_array_t(x, eltype(x), stackdict)
 end
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 45adac55a355cf..1f54940c4b5de2 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -117,12 +117,14 @@ function firstcaller(bt::Vector, funcsyms)
             end
             found = lkup.func in funcsyms
             # look for constructor type name
-            if !found && lkup.linfo isa Core.MethodInstance
+            if !found
                 li = lkup.linfo
-                ft = ccall(:jl_first_argument_datatype, Any, (Any,), li.def.sig)
-                if isa(ft, DataType) && ft.name === Type.body.name
-                    ft = unwrap_unionall(ft.parameters[1])
-                    found = (isa(ft, DataType) && ft.name.name in funcsyms)
+                if li isa Core.MethodInstance
+                    ft = ccall(:jl_first_argument_datatype, Any, (Any,), (li.def::Method).sig)
+                    if isa(ft, DataType) && ft.name === Type.body.name
+                        ft = unwrap_unionall(ft.parameters[1])
+                        found = (isa(ft, DataType) && ft.name.name in funcsyms)
+                    end
                 end
             end
         end
@@ -235,7 +237,36 @@ function parameter_upper_bound(t::UnionAll, idx)
 end
 
 # these were internal functions, but some packages seem to be relying on them
-@deprecate cat_shape(dims, shape::Tuple{}, shapes::Tuple...) cat_shape(dims, shapes)
+@deprecate cat_shape(dims, shape::Tuple{}, shapes::Tuple...) cat_shape(dims, shapes) false
 cat_shape(dims, shape::Tuple{}) = () # make sure `cat_shape(dims, ())` do not recursively calls itself
 
+@deprecate unsafe_indices(A) axes(A) false
+@deprecate unsafe_length(r) length(r) false
+
+# these were internal type aliases, but some pacakges seem to be relying on them
+const Any16{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
+                        Any,Any,Any,Any,Any,Any,Any,Any,Vararg{Any,N}}
+const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
+                         T,T,T,T,T,T,T,T,Vararg{T,N}}
+
 # END 1.6 deprecations
+
+# BEGIN 1.7 deprecations
+
+# the plan is to eventually overload getproperty to access entries of the dict
+@noinline function getproperty(x::Pairs, s::Symbol)
+    depwarn("use values(kwargs) and keys(kwargs) instead of kwargs.data and kwargs.itr", :getproperty, force=true)
+    return getfield(x, s)
+end
+
+# This function was marked as experimental and not exported.
+@deprecate catch_stack(task=current_task(); include_bt=true) current_exceptions(task; backtrace=include_bt) false
+
+# END 1.7 deprecations
+
+# BEGIN 1.8 deprecations
+
+@deprecate var"@_inline_meta"   var"@inline"   false
+@deprecate var"@_noinline_meta" var"@noinline" false
+
+# END 1.8 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index cc5c9efb6ada85..6918677c4f0bb4 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -303,7 +303,6 @@ end
 # and the key would be inserted at pos
 # This version is for use by setindex! and get!
 function ht_keyindex2!(h::Dict{K,V}, key) where V where K
-    age0 = h.age
     sz = length(h.keys)
     iter = 0
     maxprobe = h.maxprobe
@@ -488,6 +487,9 @@ end
 Return the value stored for the given key, or the given default value if no mapping for the
 key is present.
 
+!!! compat "Julia 1.7"
+    For tuples and numbers, this function requires at least Julia 1.7.
+
 # Examples
 ```jldoctest
 julia> d = Dict("a"=>1, "b"=>2);
@@ -717,7 +719,7 @@ end
 function map!(f, iter::ValueIterator{<:Dict})
     dict = iter.dict
     vals = dict.vals
-    # @inbounds is here so the it gets propagated to isslotfiled
+    # @inbounds is here so that it gets propagated to isslotfilled
     @inbounds for i = dict.idxfloor:lastindex(vals)
         if isslotfilled(dict, i)
             vals[i] = f(vals[i])
@@ -726,6 +728,21 @@ function map!(f, iter::ValueIterator{<:Dict})
     return iter
 end
 
+function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
+    for (k, v) in d2
+        i = ht_keyindex2!(d1, k)
+        if i > 0
+            d1.vals[i] = combine(d1.vals[i], v)
+        else
+            if !isequal(k, convert(K, k))
+                throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
+            end
+            @inbounds _setindex!(d1, convert(V, v), k, -i)
+        end
+    end
+    return d1
+end
+
 struct ImmutableDict{K,V} <: AbstractDict{K,V}
     parent::ImmutableDict{K,V}
     key::K
@@ -792,12 +809,20 @@ function get(dict::ImmutableDict, key, default)
     return default
 end
 
+function get(default::Callable, dict::ImmutableDict, key)
+    while isdefined(dict, :parent)
+        isequal(dict.key, key) && return dict.value
+        dict = dict.parent
+    end
+    return default()
+end
+
 # this actually defines reverse iteration (e.g. it should not be used for merge/copy/filter type operations)
 function iterate(d::ImmutableDict{K,V}, t=d) where {K, V}
     !isdefined(t, :parent) && return nothing
     (Pair{K,V}(t.key, t.value), t.parent)
 end
-length(t::ImmutableDict) = count(x->true, t)
+length(t::ImmutableDict) = count(Returns(true), t)
 isempty(t::ImmutableDict) = !isdefined(t, :parent)
 empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
 
diff --git a/base/div.jl b/base/div.jl
index 2df79fbdc61243..1c2db28dedac03 100644
--- a/base/div.jl
+++ b/base/div.jl
@@ -5,14 +5,14 @@
 """
     div(x, y, r::RoundingMode=RoundToZero)
 
-The quotient from Euclidean division. Computes x/y, rounded to an integer according
-to the rounding mode `r`. In other words, the quantity
+The quotient from Euclidean (integer) division. Computes x/y, rounded to
+an integer according to the rounding mode `r`. In other words, the quantity
 
     round(x/y,r)
 
 without any intermediate rounding.
 
-See also: [`fld`](@ref), [`cld`](@ref) which are special cases of this function
+See also [`fld`](@ref) and [`cld`](@ref), which are special cases of this function.
 
 # Examples:
 ```jldoctest
@@ -88,12 +88,16 @@ rem(x::Integer, y::Integer, r::RoundingMode{:Nearest}) = divrem(x, y, r)[2]
 
 Largest integer less than or equal to `x/y`. Equivalent to `div(x, y, RoundDown)`.
 
-See also: [`div`](@ref)
+See also [`div`](@ref), [`cld`](@ref), [`fld1`](@ref).
 
 # Examples
 ```jldoctest
 julia> fld(7.3,5.5)
 1.0
+
+julia> fld.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ -2  -2  -1  -1  -1  0  0  0  1  1  1
 ```
 Because `fld(x, y)` implements strictly correct floored rounding based on the true
 value of floating-point numbers, unintuitive situations can arise. For example:
@@ -109,7 +113,7 @@ What is happening here is that the true value of the floating-point number writt
 as `0.1` is slightly larger than the numerical value 1/10 while `6.0` represents
 the number 6 precisely. Therefore the true value of `6.0 / 0.1` is slightly less
 than 60. When doing division, this is rounded to precisely `60.0`, but
-`fld(6.0, 0.1)` always takes the floor or the true value, so the result is `59.0`.
+`fld(6.0, 0.1)` always takes the floor of the true value, so the result is `59.0`.
 """
 fld(a, b) = div(a, b, RoundDown)
 
@@ -118,12 +122,16 @@ fld(a, b) = div(a, b, RoundDown)
 
 Smallest integer larger than or equal to `x/y`. Equivalent to `div(x, y, RoundUp)`.
 
-See also: [`div`](@ref)
+See also [`div`](@ref), [`fld`](@ref).
 
 # Examples
 ```jldoctest
 julia> cld(5.5,2.2)
 3.0
+
+julia> cld.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ -1  -1  -1  0  0  0  1  1  1  2  2
 ```
 """
 cld(a, b) = div(a, b, RoundUp)
@@ -136,6 +144,8 @@ The quotient and remainder from Euclidean division.
 Equivalent to `(div(x,y,r), rem(x,y,r))`. Equivalently, with the default
 value of `r`, this call is equivalent to `(x÷y, x%y)`.
 
+See also: [`fldmod`](@ref), [`cld`](@ref).
+
 # Examples
 ```jldoctest
 julia> divrem(3,7)
@@ -211,6 +221,8 @@ end
 
 The floored quotient and modulus after division. A convenience wrapper for
 `divrem(x, y, RoundDown)`. Equivalent to `(fld(x,y), mod(x,y))`.
+
+See also: [`fld`](@ref), [`cld`](@ref), [`fldmod1`](@ref).
 """
 fldmod(x,y) = divrem(x, y, RoundDown)
 
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index 85c737780d4afc..f5467bb475ac2b 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -276,6 +276,19 @@ julia> z
 """
 kw"global"
 
+"""
+    ' '
+
+A pair of single-quote characters delimit a [`Char`](@ref) (that is, character) literal.
+
+# Examples
+```jldoctest
+julia> 'j'
+'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase)
+```
+"""
+kw"''"
+
 """
     =
 
@@ -452,6 +465,18 @@ For other purposes, `:( ... )` and `quote .. end` blocks are treated identically
 """
 kw"quote"
 
+"""
+    @
+
+The at sign followed by a macro name marks a macro call. Macros provide the
+ability to include generated code in the final body of a program. A macro maps
+a tuple of arguments, expressed as space-separated expressions or a
+function-call-like argument list, to a returned *expression*. The resulting
+expression is compiled directly into the surrounding code. See
+[Metaprogramming](@ref man-macros) for more details and examples.
+"""
+kw"@"
+
 """
     {}
 
@@ -602,6 +627,32 @@ the last expression in the function body.
 """
 kw"function"
 
+"""
+    x -> y
+
+Create an anonymous function mapping argument(s) `x` to the function body `y`.
+
+```jldoctest
+julia> f = x -> x^2 + 2x - 1
+#1 (generic function with 1 method)
+
+julia> f(2)
+7
+```
+
+Anonymous functions can also be defined for multiple argumets.
+```jldoctest
+julia> g = (x,y) -> x^2 + y^2
+#2 (generic function with 1 method)
+
+julia> g(2,3)
+13
+```
+
+See the manual section on [anonymous functions](@ref man-anonymous-functions) for more details.
+"""
+kw"->"
+
 """
     return
 
@@ -692,7 +743,7 @@ See the manual section on [control flow](@ref man-conditional-evaluation) for mo
 ```
 julia> x = 1; y = 2;
 
-julia> println(x > y ? "x is larger" : "y is larger")
+julia> x > y ? println("x is larger") : println("y is larger")
 y is larger
 ```
 """
@@ -923,9 +974,19 @@ kw"..."
     ;
 
 `;` has a similar role in Julia as in many C-like languages, and is used to delimit the
-end of the previous statement. `;` is not necessary after new lines, but can be used to
+end of the previous statement.
+
+`;` is not necessary at the end of a line, but can be used to
 separate statements on a single line or to join statements into a single expression.
-`;` is also used to suppress output printing in the REPL and similar interfaces.
+
+Adding `;` at the end of a line in the REPL will suppress printing the result of that expression.
+
+In function declarations, and optionally in calls, `;` separates regular arguments from keywords.
+
+While constructing arrays, if the arguments inside the square brackets are separated by `;`
+then their contents are vertically concatenated together.
+
+In the standard REPL, typing `;` on an empty line will switch to shell mode.
 
 # Examples
 ```julia
@@ -942,6 +1003,19 @@ julia> foo();
 
 julia> bar()
 "Hello, Mars!"
+
+julia> function plot(x, y; style="solid", width=1, color="black")
+           ###
+       end
+
+julia> [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> ; # upon typing ;, the prompt changes (in place) to: shell>
+shell> echo hello
+hello
 ```
 """
 kw";"
@@ -950,6 +1024,19 @@ kw";"
     x && y
 
 Short-circuiting boolean AND.
+
+See also [`&`](@ref), the ternary operator `? :`, and the manual section on [control flow](@ref man-conditional-evaluation).
+
+# Examples
+```jldoctest
+julia> x = 3;
+
+julia> x > 1 && x < 10 && x isa Int
+true
+
+julia> x < 0 && error("expected positive x")
+false
+```
 """
 kw"&&"
 
@@ -957,6 +1044,17 @@ kw"&&"
     x || y
 
 Short-circuiting boolean OR.
+
+See also: [`|`](@ref), [`xor`](@ref), [`&&`](@ref).
+
+# Examples
+```jldoctest
+julia> pi < 3 || ℯ < 3
+true
+
+julia> false || true || println("neither is true!")
+true
+```
 """
 kw"||"
 
@@ -1180,6 +1278,8 @@ devnull
     Nothing
 
 A type with no fields that is the type of [`nothing`](@ref).
+
+See also: [`isnothing`](@ref), [`Some`](@ref), [`Missing`](@ref).
 """
 Nothing
 
@@ -1188,6 +1288,8 @@ Nothing
 
 The singleton instance of type [`Nothing`](@ref), used by convention when there is no value to return
 (as in a C `void` function) or when a variable or field holds no value.
+
+See also: [`isnothing`](@ref), [`something`](@ref), [`missing`](@ref).
 """
 nothing
 
@@ -1730,6 +1832,8 @@ NaN
 julia> false * NaN
 0.0
 ```
+
+See also: [`digits`](@ref), [`iszero`](@ref), [`NaN`](@ref).
 """
 Bool
 
@@ -1816,19 +1920,31 @@ Symbol(x...)
 
 Construct a tuple of the given objects.
 
+See also [`Tuple`](@ref), [`NamedTuple`](@ref).
+
 # Examples
 ```jldoctest
-julia> tuple(1, 'a', pi)
-(1, 'a', π)
+julia> tuple(1, 'b', pi)
+(1, 'b', π)
+
+julia> ans === (1, 'b', π)
+true
+
+julia> Tuple(Real[1, 2, pi])  # takes a collection
+(1, 2, π)
 ```
 """
 tuple
 
 """
-    getfield(value, name::Symbol)
-    getfield(value, i::Int)
+    getfield(value, name::Symbol, [order::Symbol])
+    getfield(value, i::Int, [order::Symbol])
 
-Extract a field from a composite `value` by name or position.
+Extract a field from a composite `value` by name or position. Optionally, an
+ordering can be defined for the operation. If the field was declared `@atomic`,
+the specification is strongly recommended to be compatible with the stores to
+that location. Otherwise, if not declared as `@atomic`, this parameter must be
+`:not_atomic` if specified.
 See also [`getproperty`](@ref Base.getproperty) and [`fieldnames`](@ref).
 
 # Examples
@@ -1849,10 +1965,14 @@ julia> getfield(a, 1)
 getfield
 
 """
-    setfield!(value, name::Symbol, x)
+    setfield!(value, name::Symbol, x, [order::Symbol])
+    setfield!(value, i::Int, x, [order::Symbol])
 
-Assign `x` to a named field in `value` of composite type.
-The `value` must be mutable and `x` must be a subtype of `fieldtype(typeof(value), name)`.
+Assign `x` to a named field in `value` of composite type. The `value` must be
+mutable and `x` must be a subtype of `fieldtype(typeof(value), name)`.
+Additionally, an ordering can be specified for this operation. If the field was
+declared `@atomic`, this specification is mandatory. Otherwise, if not declared
+as `@atomic`, it must be `:not_atomic` if specified.
 See also [`setproperty!`](@ref Base.setproperty!).
 
 # Examples
@@ -1872,16 +1992,68 @@ julia> a = 1//2
 1//2
 
 julia> setfield!(a, :num, 3);
-ERROR: setfield! immutable struct of type Rational cannot be changed
+ERROR: setfield!: immutable struct of type Rational cannot be changed
 ```
 """
 setfield!
 
+"""
+    swapfield!(value, name::Symbol, x, [order::Symbol])
+    swapfield!(value, i::Int, x, [order::Symbol])
+
+These atomically perform the operations to simultaneously get and set a field:
+
+    y = getfield(value, name)
+    setfield!(value, name, x)
+    return y
+"""
+swapfield!
+
+"""
+    modifyfield!(value, name::Symbol, op, x, [order::Symbol]) -> Pair
+    modifyfield!(value, i::Int, op, x, [order::Symbol]) -> Pair
+
+These atomically perform the operations to get and set a field after applying
+the function `op`.
+
+    y = getfield(value, name)
+    z = op(y, x)
+    setfield!(value, name, z)
+    return y => z
+
+If supported by the hardware (for example, atomic increment), this may be
+optimized to the appropriate hardware instruction, otherwise it'll use a loop.
+"""
+modifyfield!
+
+"""
+    replacefield!(value, name::Symbol, expected, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
+    replacefield!(value, i::Int, expected, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
+
+These atomically perform the operations to get and conditionally set a field to
+a given value.
+
+    y = getfield(value, name, fail_order)
+    ok = y === expected
+    if ok
+        setfield!(value, name, desired, success_order)
+    end
+    return (; old = y, success = ok)
+
+If supported by the hardware, this may be optimized to the appropriate hardware
+instruction, otherwise it'll use a loop.
+"""
+replacefield!
+
 """
     typeof(x)
 
 Get the concrete type of `x`.
 
+See also [`eltype`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = 1//2;
@@ -1898,12 +2070,16 @@ Matrix{Float64} (alias for Array{Float64, 2})
 typeof
 
 """
-    isdefined(m::Module, s::Symbol)
-    isdefined(object, s::Symbol)
-    isdefined(object, index::Int)
+    isdefined(m::Module, s::Symbol, [order::Symbol])
+    isdefined(object, s::Symbol, [order::Symbol])
+    isdefined(object, index::Int, [order::Symbol])
 
-Tests whether a global variable or object field is defined. The arguments can be a module and a symbol
-or a composite object and field name (as a symbol) or index.
+Tests whether a global variable or object field is defined. The arguments can
+be a module and a symbol or a composite object and field name (as a symbol) or
+index. Optionally, an ordering can be defined for the operation. If the field
+was declared `@atomic`, the specification is strongly recommended to be
+compatible with the stores to that location. Otherwise, if not declared as
+`@atomic`, this parameter must be `:not_atomic` if specified.
 
 To test whether an array element is defined, use [`isassigned`](@ref) instead.
 
@@ -1938,7 +2114,7 @@ isdefined
 """
     Vector{T}(undef, n)
 
-Construct an uninitialized [`Vector{T}`](@ref) of length `n`. See [`undef`](@ref).
+Construct an uninitialized [`Vector{T}`](@ref) of length `n`.
 
 # Examples
 ```julia-repl
@@ -1988,14 +2164,19 @@ Vector{T}(::Missing, n)
 """
     Matrix{T}(undef, m, n)
 
-Construct an uninitialized [`Matrix{T}`](@ref) of size `m`×`n`. See [`undef`](@ref).
+Construct an uninitialized [`Matrix{T}`](@ref) of size `m`×`n`.
 
 # Examples
 ```julia-repl
 julia> Matrix{Float64}(undef, 2, 3)
 2×3 Array{Float64, 2}:
- 6.93517e-310  6.93517e-310  6.93517e-310
- 6.93517e-310  6.93517e-310  1.29396e-320
+ 2.36365e-314  2.28473e-314    5.0e-324
+ 2.26704e-314  2.26711e-314  NaN
+
+julia> similar(ans, Int32, 2, 2)
+2×2 Matrix{Int32}:
+ 490537216  1277177453
+         1  1936748399
 ```
 """
 Matrix{T}(::UndefInitializer, m, n)
@@ -2043,19 +2224,28 @@ containing elements of type `T`. `N` can either be supplied explicitly,
 as in `Array{T,N}(undef, dims)`, or be determined by the length or number of `dims`.
 `dims` may be a tuple or a series of integer arguments corresponding to the lengths
 in each dimension. If the rank `N` is supplied explicitly, then it must
-match the length or number of `dims`. See [`undef`](@ref).
+match the length or number of `dims`. Here [`undef`](@ref) is
+the [`UndefInitializer`](@ref).
 
 # Examples
 ```julia-repl
 julia> A = Array{Float64, 2}(undef, 2, 3) # N given explicitly
-2×3 Array{Float64, 2}:
+2×3 Matrix{Float64}:
  6.90198e-310  6.90198e-310  6.90198e-310
  6.90198e-310  6.90198e-310  0.0
 
-julia> B = Array{Float64}(undef, 2) # N determined by the input
-2-element Array{Float64, 1}:
- 1.87103e-320
- 0.0
+julia> B = Array{Float64}(undef, 4) # N determined by the input
+4-element Vector{Float64}:
+   2.360075077e-314
+ NaN
+   2.2671131793e-314
+   2.299821756e-314
+
+julia> similar(B, 2, 4, 1) # use typeof(B), and the given size
+2×4×1 Array{Float64, 3}:
+[:, :, 1] =
+ 2.26703e-314  2.26708e-314  0.0           2.80997e-314
+ 0.0           2.26703e-314  2.26708e-314  0.0
 ```
 """
 Array{T,N}(::UndefInitializer, dims)
@@ -2132,10 +2322,12 @@ Alias for `UndefInitializer()`, which constructs an instance of the singleton ty
 [`UndefInitializer`](@ref), used in array initialization to indicate the
 array-constructor-caller would like an uninitialized array.
 
+See also: [`missing`](@ref), [`similar`](@ref).
+
 # Examples
 ```julia-repl
 julia> Array{Float64, 1}(undef, 3)
-3-element Array{Float64, 1}:
+3-element Vector{Float64}:
  2.2752528595e-314
  2.202942107e-314
  2.275252907e-314
@@ -2171,6 +2363,8 @@ julia> +(1, 20, 4)
 
 Unary minus operator.
 
+See also: [`abs`](@ref), [`flipsign`](@ref).
+
 # Examples
 ```jldoctest
 julia> -1
@@ -2242,8 +2436,8 @@ julia> 4.5/2
 """
     ArgumentError(msg)
 
-The parameters to a function call do not match a valid signature. Argument `msg` is a
-descriptive error string.
+The arguments passed to a function are invalid.
+`msg` is a descriptive error message.
 """
 ArgumentError
 
@@ -2276,6 +2470,9 @@ AssertionError
 
 An error occurred while [`include`](@ref Base.include)ing, [`require`](@ref Base.require)ing, or [`using`](@ref) a file. The error specifics
 should be available in the `.error` field.
+
+!!! compat "Julia 1.7"
+    LoadErrors are no longer emitted by `@macroexpand`, `@macroexpand1`, and `macroexpand` as of Julia 1.7.
 """
 LoadError
 
@@ -2355,10 +2552,20 @@ UnionAll
 """
     ::
 
-With the `::`-operator type annotations are attached to expressions and variables in programs.
-See the manual section on [Type Declarations](@ref).
+The `::` operator either asserts that a value has the given type, or declares that
+a local variable or function return always has the given type.
+
+Given `expression::T`, `expression` is first evaluated. If the result is of type
+`T`, the value is simply returned. Otherwise, a [`TypeError`](@ref) is thrown.
+
+In local scope, the syntax `local x::T` or `x::T = expression` declares that local variable
+`x` always has type `T`. When a value is assigned to the variable, it will be
+converted to type `T` by calling [`convert`](@ref).
 
-Outside of declarations `::` is used to assert that expressions and variables in programs have a given type.
+In a method declaration, the syntax `function f(x)::T` causes any value returned by
+the method to be converted to type `T`.
+
+See the manual section on [Type Declarations](@ref).
 
 # Examples
 ```jldoctest
@@ -2367,6 +2574,13 @@ ERROR: TypeError: typeassert: expected AbstractFloat, got a value of type Int64
 
 julia> (1+2)::Int
 3
+
+julia> let
+           local x::Int
+           x = 2.0
+           x
+       end
+2
 ```
 """
 kw"::"
@@ -2379,6 +2593,8 @@ number of trailing elements. `Vararg{T,N}` corresponds to exactly `N` elements o
 `Vararg{T}` corresponds to zero or more elements of type `T`. `Vararg` tuple types are used to represent the
 arguments accepted by varargs methods (see the section on [Varargs Functions](@ref) in the manual.)
 
+See also [`NTuple`](@ref).
+
 # Examples
 ```jldoctest
 julia> mytupletype = Tuple{AbstractString, Vararg{Int}}
@@ -2411,6 +2627,8 @@ is considered an abstract type, and tuple types are only concrete if their param
 field names; fields are only accessed by index.
 
 See the manual section on [Tuple Types](@ref).
+
+See also [`Vararg`](@ref), [`NTuple`](@ref), [`tuple`](@ref), [`NamedTuple`](@ref).
 """
 Tuple
 
@@ -2466,8 +2684,11 @@ typeassert
 
 """
     getproperty(value, name::Symbol)
+    getproperty(value, name::Symbol, order::Symbol)
 
 The syntax `a.b` calls `getproperty(a, :b)`.
+The syntax `@atomic order a.b` calls `getproperty(a, :b, :order)` and
+the syntax `@atomic a.b` calls `getproperty(a, :b, :sequentially_consistent)`.
 
 # Examples
 ```jldoctest
@@ -2492,21 +2713,62 @@ julia> obj.x
 1
 ```
 
-See also [`propertynames`](@ref Base.propertynames) and
+See also [`getfield`](@ref Core.getfield),
+[`propertynames`](@ref Base.propertynames) and
 [`setproperty!`](@ref Base.setproperty!).
 """
 Base.getproperty
 
 """
     setproperty!(value, name::Symbol, x)
+    setproperty!(value, name::Symbol, x, order::Symbol)
 
 The syntax `a.b = c` calls `setproperty!(a, :b, c)`.
+The syntax `@atomic order a.b = c` calls `setproperty!(a, :b, c, :order)`
+and the syntax `@atomic a.b = c` calls `getproperty(a, :b, :sequentially_consistent)`.
 
-See also [`propertynames`](@ref Base.propertynames) and
+See also [`setfield!`](@ref Core.setfield!),
+[`propertynames`](@ref Base.propertynames) and
 [`getproperty`](@ref Base.getproperty).
 """
 Base.setproperty!
 
+"""
+    swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
+
+The syntax `@atomic a.b, _ = c, a.b` returns `(c, swapproperty!(a, :b, c, :sequentially_consistent))`,
+where there must be one getfield expression common to both sides.
+
+See also [`swapfield!`](@ref Core.swapfield!)
+and [`setproperty!`](@ref Base.setproperty!).
+"""
+Base.swapproperty!
+
+"""
+    modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
+
+The syntax `@atomic! max(a().b, c)` returns `modifyproperty!(a(), :b,
+max, c, :sequentially_consistent))`, where the first argument must be a
+`getfield` expression and is modified atomically.
+
+See also [`modifyfield!`](@ref Core.modifyfield!)
+and [`setproperty!`](@ref Base.setproperty!).
+"""
+Base.modifyproperty!
+
+"""
+    replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+
+Perform a compare-and-swap operation on `x.f` from `expected` to `desired`, per
+egal. The syntax `@atomic_replace! x.f expected => desired` can be used instead
+of the function call form.
+
+See also [`replacefield!`](@ref Core.replacefield!)
+and [`setproperty!`](@ref Base.setproperty!).
+"""
+Base.replaceproperty!
+
+
 """
     StridedArray{T, N}
 
@@ -2590,4 +2852,46 @@ A quoted piece of code, that does not support interpolation. See the [manual sec
 """
 QuoteNode
 
+
+"""
+    "
+`"` Is used to delimit string literals.
+
+# Examples
+
+```jldoctest
+julia> "Hello World!"
+"Hello World!"
+
+julia> "Hello World!\\n"
+"Hello World!\\n"
+```
+
+See also [`\"""`](@ref \"\"\").
+"""
+kw"\""
+
+"""
+    \"""
+`\"""` is used to delimit string literals. Strings created by triple quotation marks can contain `"` characters without escaping and are dedented to the level of the least-indented line. This is useful for defining strings within code that is indented.
+
+# Examples
+
+```jldoctest
+julia> \"""Hello World!\"""
+"Hello World!"
+
+julia> \"""Contains "quote" characters\"""
+"Contains \\"quote\\" characters"
+
+julia> \"""
+         Hello,
+         world.\"""
+"Hello,\\nworld."
+```
+
+See also [`"`](@ref \")
+"""
+kw"\"\"\""
+
 end
diff --git a/base/docs/utils.jl b/base/docs/utils.jl
index 841af2d2c2b9b0..cac029295d81ee 100644
--- a/base/docs/utils.jl
+++ b/base/docs/utils.jl
@@ -43,6 +43,12 @@ show(io::IO, ::MIME"text/html", h::HTML{<:Function}) = h.content(io)
     @html_str -> Docs.HTML
 
 Create an `HTML` object from a literal string.
+
+# Examples
+```jldoctest
+julia> html"Julia"
+HTML{String}("Julia")
+```
 """
 macro html_str(s)
     :(HTML($s))
@@ -89,6 +95,12 @@ hash(t::T, h::UInt) where {T<:Union{HTML,Text}} = hash(T, hash(t.content, h))
     @text_str -> Docs.Text
 
 Create a `Text` object from a literal string.
+
+# Examples
+```jldoctest
+julia> text"Julia"
+Julia
+```
 """
 macro text_str(s)
     :(Text($s))
diff --git a/base/env.jl b/base/env.jl
index 8f5256f25915ee..7d47a4de090a3a 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -32,7 +32,7 @@ if Sys.iswindows()
     function _unsetenv(svar::AbstractString)
         var = cwstring(svar)
         ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,C_NULL)
-        windowserror(:setenv, ret == 0)
+        windowserror(:setenv, ret == 0 && Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND)
     end
 else # !windows
     _getenv(var::AbstractString) = ccall(:getenv, Cstring, (Cstring,), var)
@@ -77,7 +77,7 @@ variable may result in an uppercase `ENV` key.)
 const ENV = EnvDict()
 
 getindex(::EnvDict, k::AbstractString) = access_env(k->throw(KeyError(k)), k)
-get(::EnvDict, k::AbstractString, def) = access_env(k->def, k)
+get(::EnvDict, k::AbstractString, def) = access_env(Returns(def), k)
 get(f::Callable, ::EnvDict, k::AbstractString) = access_env(k->f(), k)
 in(k::AbstractString, ::KeySet{String, EnvDict}) = _hasenv(k)
 pop!(::EnvDict, k::AbstractString) = (v = ENV[k]; _unsetenv(k); v)
@@ -87,7 +87,7 @@ setindex!(::EnvDict, v, k::AbstractString) = _setenv(k,string(v))
 push!(::EnvDict, kv::Pair{<:AbstractString}) = setindex!(ENV, kv.second, kv.first)
 
 if Sys.iswindows()
-    GESW() = (pos = ccall(:GetEnvironmentStringsW,stdcall,Ptr{UInt16},()); (pos,pos))
+    GESW() = (pos = ccall(:GetEnvironmentStringsW, stdcall, Ptr{UInt16}, ()); (pos, pos))
     function winuppercase(s::AbstractString)
         isempty(s) && return s
         LOCALE_INVARIANT = 0x0000007f
@@ -99,32 +99,43 @@ if Sys.iswindows()
         return transcode(String, ws)
     end
     function iterate(hash::EnvDict, block::Tuple{Ptr{UInt16},Ptr{UInt16}} = GESW())
-        if unsafe_load(block[1]) == 0
-            ccall(:FreeEnvironmentStringsW, stdcall, Int32, (Ptr{UInt16},), block[2])
-            return nothing
+        while true
+            if unsafe_load(block[1]) == 0
+                ccall(:FreeEnvironmentStringsW, stdcall, Int32, (Ptr{UInt16},), block[2])
+                return nothing
+            end
+            pos = block[1]
+            blk = block[2]
+            len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)
+            buf = Vector{UInt16}(undef, len)
+            GC.@preserve buf unsafe_copyto!(pointer(buf), pos, len)
+            env = transcode(String, buf)
+            pos += (len + 1) * 2
+            if !isempty(env)
+                m = findnext('=', env, nextind(env, firstindex(env)))
+            else
+                m = nothing
+            end
+            if m === nothing
+                @warn "malformed environment entry: $env"
+                continue
+            end
+            return (Pair{String,String}(winuppercase(env[1:prevind(env, m)]), env[nextind(env, m):end]), (pos, blk))
         end
-        pos = block[1]
-        blk = block[2]
-        len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)
-        buf = Vector{UInt16}(undef, len)
-        GC.@preserve buf unsafe_copyto!(pointer(buf), pos, len)
-        env = transcode(String, buf)
-        m = match(r"^(=?[^=]+)=(.*)$"s, env)
-        if m === nothing
-            error("malformed environment entry: $env")
-        end
-        return (Pair{String,String}(winuppercase(m.captures[1]), m.captures[2]), (pos+(len+1)*2, blk))
     end
 else # !windows
     function iterate(::EnvDict, i=0)
-        env = ccall(:jl_environ, Any, (Int32,), i)
-        env === nothing && return nothing
-        env = env::String
-        m = match(r"^(.*?)=(.*)$"s, env)
-        if m === nothing
-            error("malformed environment entry: $env")
+        while true
+            env = ccall(:jl_environ, Any, (Int32,), i)
+            env === nothing && return nothing
+            env = env::String
+            m = findfirst('=', env)
+            if m === nothing
+                @warn "malformed environment entry: $env"
+                nothing
+            end
+            return (Pair{String,String}(env[1:prevind(env, m)], env[nextind(env, m):end]), i+1)
         end
-        return (Pair{String,String}(m.captures[1], m.captures[2]), i+1)
     end
 end # os-test
 
diff --git a/base/error.jl b/base/error.jl
index 72c13396e3fc47..653f4f9760edff 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -38,7 +38,7 @@ error(s::AbstractString) = throw(ErrorException(s))
 Raise an `ErrorException` with the given message.
 """
 function error(s::Vararg{Any,N}) where {N}
-    @_noinline_meta
+    @noinline
     throw(ErrorException(Main.Base.string(s...)))
 end
 
@@ -54,7 +54,7 @@ exception will continue propagation as if it had not been caught.
     the program state at the time of the error so you're encouraged to instead
     throw a new exception using `throw(e)`. In Julia 1.1 and above, using
     `throw(e)` will preserve the root cause exception on the stack, as
-    described in [`catch_stack`](@ref).
+    described in [`current_exceptions`](@ref).
 """
 rethrow() = ccall(:jl_rethrow, Bottom, ())
 rethrow(@nospecialize(e)) = ccall(:jl_rethrow_other, Bottom, (Any,), e)
@@ -105,7 +105,7 @@ end
 Get a backtrace object for the current program point.
 """
 function backtrace()
-    @_noinline_meta
+    @noinline
     # skip frame for backtrace(). Note that for this to work properly,
     # backtrace() itself must not be interpreted nor inlined.
     skip = 1
@@ -123,37 +123,43 @@ function catch_backtrace()
     return _reformat_bt(bt::Vector{Ptr{Cvoid}}, bt2::Vector{Any})
 end
 
+struct ExceptionStack <: AbstractArray{Any,1}
+    stack::Array{Any,1}
+end
+
 """
-    catch_stack(task=current_task(); [inclue_bt=true])
+    current_exceptions(task::Task=current_task(); [backtrace::Bool=true])
 
 Get the stack of exceptions currently being handled. For nested catch blocks
 there may be more than one current exception in which case the most recently
-thrown exception is last in the stack. The stack is returned as a Vector of
-`(exception,backtrace)` pairs, or a Vector of exceptions if `include_bt` is
-false.
+thrown exception is last in the stack. The stack is returned as an
+`ExceptionStack` which is an AbstractVector of named tuples
+`(exception,backtrace)`. If `backtrace` is false, the backtrace in each pair
+will be set to `nothing`.
 
 Explicitly passing `task` will return the current exception stack on an
 arbitrary task. This is useful for inspecting tasks which have failed due to
 uncaught exceptions.
 
-!!! compat "Julia 1.1"
-    This function is experimental in Julia 1.1 and will likely be renamed in a
-    future release (see https://github.com/JuliaLang/julia/pull/29901).
+!!! compat "Julia 1.7"
+    This function went by the experiemental name `catch_stack()` in Julia
+    1.1–1.6, and had a plain Vector-of-tuples as a return type.
 """
-function catch_stack(task=current_task(); include_bt=true)
-    raw = ccall(:jl_get_excstack, Any, (Any,Cint,Cint), task, include_bt, typemax(Cint))::Vector{Any}
+function current_exceptions(task::Task=current_task(); backtrace::Bool=true)
+    raw = ccall(:jl_get_excstack, Any, (Any,Cint,Cint), task, backtrace, typemax(Cint))::Vector{Any}
     formatted = Any[]
-    stride = include_bt ? 3 : 1
+    stride = backtrace ? 3 : 1
     for i = reverse(1:stride:length(raw))
-        e = raw[i]
-        push!(formatted, include_bt ? (e,Base._reformat_bt(raw[i+1],raw[i+2])) : e)
+        exc = raw[i]
+        bt = backtrace ? Base._reformat_bt(raw[i+1],raw[i+2]) : nothing
+        push!(formatted, (exception=exc,backtrace=bt))
     end
-    formatted
+    ExceptionStack(formatted)
 end
 
 ## keyword arg lowering generates calls to this ##
 function kwerr(kw, args::Vararg{Any,N}) where {N}
-    @_noinline_meta
+    @noinline
     throw(MethodError(typeof(args[1]).name.mt.kwsorter, (kw,args...)))
 end
 
diff --git a/base/errorshow.jl b/base/errorshow.jl
index 2baf5d2471315c..1ec7235b538a96 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -9,7 +9,7 @@ This method is used to display the exception after a call to [`throw`](@ref).
 # Examples
 ```jldoctest
 julia> struct MyException <: Exception
-           msg::AbstractString
+           msg::String
        end
 
 julia> function Base.showerror(io::IO, err::MyException)
@@ -92,7 +92,7 @@ function showerror(io::IO, ex, bt; backtrace=true)
 end
 
 function showerror(io::IO, ex::LoadError, bt; backtrace=true)
-    print(io, "LoadError: ")
+    !isa(ex.error, LoadError) && print(io, "LoadError: ")
     showerror(io, ex.error, bt, backtrace=backtrace)
     print(io, "\nin expression starting at $(ex.file):$(ex.line)")
 end
@@ -159,14 +159,8 @@ showerror(io::IO, ex::UndefKeywordError) =
     print(io, "UndefKeywordError: keyword argument $(ex.var) not assigned")
 
 function showerror(io::IO, ex::UndefVarError)
-    if ex.var in [:UTF16String, :UTF32String, :WString, :utf16, :utf32, :wstring, :RepString]
-        return showerror(io, ErrorException("""
-        `$(ex.var)` has been moved to the package LegacyStrings.jl:
-        Run Pkg.add("LegacyStrings") to install LegacyStrings on Julia v0.5-;
-        Then do `using LegacyStrings` to get `$(ex.var)`.
-        """))
-    end
     print(io, "UndefVarError: $(ex.var) not defined")
+    Experimental.show_error_hints(io, ex)
 end
 
 function showerror(io::IO, ex::InexactError)
@@ -205,19 +199,20 @@ function print_with_compare(io::IO, @nospecialize(a), @nospecialize(b), color::S
     end
 end
 
-function show_convert_error(io::IO, ex::MethodError, @nospecialize(arg_types_param))
+function show_convert_error(io::IO, ex::MethodError, arg_types_param)
     # See #13033
     T = striptype(ex.args[1])
     if T === nothing
         print(io, "First argument to `convert` must be a Type, got ", ex.args[1])
     else
-        print_one_line = isa(T, DataType) && isa(arg_types_param[2], DataType) && T.name != arg_types_param[2].name
+        p2 = arg_types_param[2]
+        print_one_line = isa(T, DataType) && isa(p2, DataType) && T.name != p2.name
         printstyled(io, "Cannot `convert` an object of type ")
         print_one_line || printstyled(io, "\n  ")
-        print_with_compare(io, arg_types_param[2], T, :light_green)
+        print_with_compare(io, p2, T, :light_green)
         printstyled(io, " to an object of type ")
         print_one_line || printstyled(io, "\n  ")
-        print_with_compare(io, T, arg_types_param[2], :light_red)
+        print_with_compare(io, T, p2, :light_red)
     end
 end
 
@@ -228,10 +223,11 @@ function showerror(io::IO, ex::MethodError)
     arg_types = (is_arg_types ? ex.args : typesof(ex.args...))::DataType
     f = ex.f
     meth = methods_including_ambiguous(f, arg_types)
-    if length(meth) > 1
+    if isa(meth, MethodList) && length(meth) > 1
         return showerror_ambiguous(io, meth, f, arg_types)
     end
     arg_types_param::SimpleVector = arg_types.parameters
+    show_candidates = true
     print(io, "MethodError: ")
     ft = typeof(f)
     name = ft.name.mt.name
@@ -248,7 +244,10 @@ function showerror(io::IO, ex::MethodError)
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
         f_is_function = true
         show_convert_error(io, ex, arg_types_param)
-    elseif isempty(methods(f)) && isa(f, DataType) && f.abstract
+    elseif f === mapreduce_empty || f === reduce_empty
+        print(io, "reducing over an empty collection is not allowed; consider supplying `init` to the reducer")
+        show_candidates = false
+    elseif isempty(methods(f)) && isa(f, DataType) && isabstracttype(f)
         print(io, "no constructors have been defined for ", f)
     elseif isempty(methods(f)) && !isa(f, Function) && !isa(f, Type)
         print(io, "objects of type ", ft, " are not callable")
@@ -320,7 +319,7 @@ function showerror(io::IO, ex::MethodError)
         end
     end
     Experimental.show_error_hints(io, ex, arg_types_param, kwargs)
-    try
+    show_candidates && try
         show_method_candidates(io, ex, kwargs)
     catch ex
         @error "Error showing method candidates, aborted" exception=ex,catch_backtrace()
@@ -552,13 +551,6 @@ end
 # replace `sf` as needed.
 const update_stackframes_callback = Ref{Function}(identity)
 
-function replaceuserpath(str)
-    str = replace(str, homedir() => "~")
-    # seems to be necessary for some paths with small letter drive c:// etc
-    str = replace(str, lowercasefirst(homedir()) => "~")
-    return str
-end
-
 const STACKTRACE_MODULECOLORS = [:magenta, :cyan, :green, :yellow]
 const STACKTRACE_FIXEDCOLORS = IdDict(Base => :light_black, Core => :light_black)
 
@@ -698,7 +690,7 @@ end
 function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, modulecolor)
     file, line = string(frame.file), frame.line
     stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
-    stacktrace_contract_userdir() && (file = replaceuserpath(file))
+    stacktrace_contract_userdir() && (file = contractuser(file))
 
     # Used by the REPL to make it possible to open
     # the location of a stackframe/method in the editor.
@@ -738,13 +730,7 @@ function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, m
     # filename, separator, line
     # use escape codes for formatting, printstyled can't do underlined and color
     # codes are bright black (90) and underlined (4)
-    function print_underlined(io::IO, s...)
-        colored = get(io, :color, false)::Bool
-        start_s = colored ? "\033[90;4m" : ""
-        end_s   = colored ? "\033[0m"    : ""
-        print(io, start_s, s..., end_s)
-    end
-    print_underlined(io, pathparts[end], ":", line)
+    printstyled(io, pathparts[end], ":", line; color = :light_black, underline = true)
 
     # inlined
     printstyled(io, inlined ? " [inlined]" : "", color = :light_black)
@@ -792,10 +778,9 @@ end
 # For improved user experience, filter out frames for include() implementation
 # - see #33065. See also #35371 for extended discussion of internal frames.
 function _simplify_include_frames(trace)
-    i = length(trace)
-    kept_frames = trues(i)
+    kept_frames = trues(length(trace))
     first_ignored = nothing
-    while i >= 1
+    for i in length(trace):-1:1
         frame::StackFrame, _ = trace[i]
         mod = parentmodule(frame)
         if first_ignored === nothing
@@ -817,10 +802,9 @@ function _simplify_include_frames(trace)
                 first_ignored = nothing
             end
         end
-        i -= 1
     end
     if first_ignored !== nothing
-        kept_frames[i:first_ignored] .= false
+        kept_frames[1:first_ignored] .= false
     end
     return trace[kept_frames]
 end
@@ -868,7 +852,7 @@ function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
     return _simplify_include_frames(ret)
 end
 
-function show_exception_stack(io::IO, stack::Vector)
+function show_exception_stack(io::IO, stack)
     # Display exception stack with the top of the stack first.  This ordering
     # means that the user doesn't have to scroll up in the REPL to discover the
     # root cause.
@@ -892,3 +876,28 @@ function show(io::IO, ip::InterpreterIP)
         print(io, " in $(ip.code) at statement $(Int(ip.stmt))")
     end
 end
+
+# handler for displaying a hint in case the user tries to call
+# the instance of a number (probably missing the operator)
+# eg: (1 + 2)(3 + 4)
+function noncallable_number_hint_handler(io, ex, arg_types, kwargs)
+    @nospecialize
+    if ex.f isa Number
+        print(io, "\nMaybe you forgot to use an operator such as ")
+        printstyled(io, "*, ^, %, / etc. ", color=:cyan)
+        print(io, "?")
+    end
+end
+
+Experimental.register_error_hint(noncallable_number_hint_handler, MethodError)
+
+# ExceptionStack implementation
+size(s::ExceptionStack) = size(s.stack)
+getindex(s::ExceptionStack, i::Int) = s.stack[i]
+
+function show(io::IO, ::MIME"text/plain", stack::ExceptionStack)
+    nexc = length(stack)
+    printstyled(io, nexc, "-element ExceptionStack", nexc == 0 ? "" : ":\n")
+    show_exception_stack(io, stack)
+end
+show(io::IO, stack::ExceptionStack) = show(io, MIME("text/plain"), stack)
diff --git a/base/essentials.jl b/base/essentials.jl
index 1a0e971d3a73f3..5280252f1946a2 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -26,7 +26,7 @@ abstract type AbstractDict{K,V} end
 """
     Iterators.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
 
-Transforms an indexable container into an Dictionary-view of the same data.
+Transforms an indexable container into a Dictionary-view of the same data.
 Modifying the key-space of the underlying data may invalidate this object.
 """
 struct Pairs{K, V, I, A} <: AbstractDict{K, V}
@@ -42,16 +42,6 @@ pairs(::Type{NamedTuple}) = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names
 #const NamedTuplePair{N, V, names, T<:NTuple{N, Any}} = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names, T}}
 #export NamedTuplePair
 
-
-# The real @inline macro is not available until after array.jl, so this
-# internal macro splices the meta Expr directly into the function body.
-macro _inline_meta()
-    Expr(:meta, :inline)
-end
-macro _noinline_meta()
-    Expr(:meta, :noinline)
-end
-
 macro _gc_preserve_begin(arg1)
     Expr(:gc_preserve_begin, esc(arg1))
 end
@@ -131,7 +121,8 @@ end
 
 Tests whether variable `s` is defined in the current scope.
 
-See also [`isdefined`](@ref).
+See also [`isdefined`](@ref) for field properties and [`isassigned`](@ref) for
+array indexes or [`haskey`](@ref) for other mappings.
 
 # Examples
 ```jldoctest
@@ -217,6 +208,8 @@ julia> y = convert(Vector{Int}, x);
 julia> y === x
 true
 ```
+
+See also: [`round`](@ref), [`trunc`](@ref), [`oftype`](@ref), [`reinterpret`](@ref).
 """
 function convert end
 
@@ -249,6 +242,8 @@ argtail(x, rest...) = rest
 
 Return a `Tuple` consisting of all but the first component of `x`.
 
+See also: [`front`](@ref Base.front), [`rest`](@ref Base.rest), [`first`](@ref), [`Iterators.peel`](@ref).
+
 # Examples
 ```jldoctest
 julia> Base.tail((1,2,3))
@@ -336,7 +331,7 @@ function typename(a::Union)
 end
 typename(union::UnionAll) = typename(union.body)
 
-_tuple_error(T::Type, x) = (@_noinline_meta; throw(MethodError(convert, (T, x))))
+_tuple_error(T::Type, x) = (@noinline; throw(MethodError(convert, (T, x))))
 
 convert(::Type{T}, x::T) where {T<:Tuple} = x
 function convert(::Type{T}, x::NTuple{N,Any}) where {N, T<:Tuple}
@@ -345,7 +340,7 @@ function convert(::Type{T}, x::NTuple{N,Any}) where {N, T<:Tuple}
     if typeintersect(NTuple{N,Any}, T) === Union{}
         _tuple_error(T, x)
     end
-    cvt1(n) = (@_inline_meta; convert(fieldtype(T, n), getfield(x, n, #=boundscheck=#false)))
+    cvt1(n) = (@inline; convert(fieldtype(T, n), getfield(x, n, #=boundscheck=#false)))
     return ntuple(cvt1, Val(N))::NTuple{N,Any}
 end
 
@@ -452,7 +447,9 @@ reinterpret(::Type{T}, x) where {T} = bitcast(T, x)
     sizeof(obj)
 
 Size, in bytes, of the canonical binary representation of the given `DataType` `T`, if any.
-Size, in bytes, of object `obj` if it is not `DataType`.
+Or the size, in bytes, of object `obj` if it is not a `DataType`.
+
+See also [`summarysize`](@ref).
 
 # Examples
 ```jldoctest
@@ -465,7 +462,7 @@ julia> sizeof(ComplexF64)
 julia> sizeof(1.0)
 8
 
-julia> sizeof([1.0:10.0;])
+julia> sizeof(collect(1.0:10.0))
 80
 ```
 
@@ -532,7 +529,7 @@ julia> f2()
     As noted there, the caller must verify—using information they can access—that
     their accesses are valid before using `@inbounds`. For indexing into your
     [`AbstractArray`](@ref) subclasses, for example, this involves checking the
-    indices against its [`size`](@ref). Therefore, `@boundscheck` annotations
+    indices against its [`axes`](@ref). Therefore, `@boundscheck` annotations
     should only be added to a [`getindex`](@ref) or [`setindex!`](@ref)
     implementation after you are certain its behavior is correct.
 """
@@ -716,7 +713,7 @@ call obsolete versions of a function `f`.
 `f` directly, and the type of the result cannot be inferred by the compiler.)
 """
 function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...)
-    kwargs = Base.merge(NamedTuple(), kwargs)
+    kwargs = merge(NamedTuple(), kwargs)
     if isempty(kwargs)
         return Core._call_latest(f, args...)
     end
@@ -809,6 +806,8 @@ values(itr) = itr
 
 A type with no fields whose singleton instance [`missing`](@ref) is used
 to represent missing values.
+
+See also: [`skipmissing`](@ref), [`nonmissingtype`](@ref), [`Nothing`](@ref).
 """
 struct Missing end
 
@@ -816,6 +815,8 @@ struct Missing end
     missing
 
 The singleton instance of type [`Missing`](@ref) representing a missing value.
+
+See also: [`NaN`](@ref), [`skipmissing`](@ref), [`nonmissingtype`](@ref).
 """
 const missing = Missing()
 
@@ -823,6 +824,8 @@ const missing = Missing()
     ismissing(x)
 
 Indicate whether `x` is [`missing`](@ref).
+
+See also: [`skipmissing`](@ref), [`isnothing`](@ref), [`isnan`](@ref).
 """
 ismissing(x) = x === missing
 
diff --git a/base/experimental.jl b/base/experimental.jl
index b928b6ba0e1d91..421e6861bcaab0 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -10,6 +10,7 @@
 module Experimental
 
 using Base: Threads, sync_varname
+using Base.Meta
 
 """
     Const(A::Array)
@@ -28,9 +29,9 @@ Base.IndexStyle(::Type{<:Const}) = IndexLinear()
 Base.size(C::Const) = size(C.a)
 Base.axes(C::Const) = axes(C.a)
 @eval Base.getindex(A::Const, i1::Int) =
-    (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
+    (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
 @eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =
-  (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+  (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
 
 """
     @aliasscope expr
@@ -114,7 +115,8 @@ parent module.
 Supported values are 0, 1, 2, and 3.
 
 The effective optimization level is the minimum of that specified on the
-command line and in per-module settings.
+command line and in per-module settings. If a `--min-optlevel` value is
+set on the command line, that is enforced as a lower bound.
 """
 macro optlevel(n::Int)
     return Expr(:meta, :optlevel, n)
@@ -255,4 +257,46 @@ end
 # OpaqueClosure
 include("opaque_closure.jl")
 
+"""
+    Experimental.@overlay mt [function def]
+
+Define a method and add it to the method table `mt` instead of to the global method table.
+This can be used to implement a method override mechanism. Regular compilation will not
+consider these methods, and you should customize the compilation flow to look in these
+method tables (e.g., using [`Core.Compiler.OverlayMethodTable`](@ref)).
+
+"""
+macro overlay(mt, def)
+    def = macroexpand(__module__, def) # to expand @inline, @generated, etc
+    if !isexpr(def, [:function, :(=)])
+        error("@overlay requires a function Expr")
+    end
+    if isexpr(def.args[1], :call)
+        def.args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1])
+    elseif isexpr(def.args[1], :where)
+        def.args[1].args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1].args[1])
+    else
+        error("@overlay requires a function Expr")
+    end
+    esc(def)
+end
+
+let new_mt(name::Symbol, mod::Module) = begin
+        ccall(:jl_check_top_level_effect, Cvoid, (Any, Cstring), mod, "@MethodTable")
+        ccall(:jl_new_method_table, Any, (Any, Any), name, mod)
+    end
+    @eval macro MethodTable(name::Symbol)
+        esc(:(const $name = $$new_mt($(quot(name)), $(__module__))))
+    end
+end
+
+"""
+    Experimental.@MethodTable(name)
+
+Create a new MethodTable in the current module, bound to `name`. This method table can be
+used with the [`Experimental.@overlay`](@ref) macro to define methods for a function without
+adding them to the global method table.
+"""
+:@MethodTable
+
 end
diff --git a/base/exports.jl b/base/exports.jl
index adfb1772600d3d..36baa386d5510a 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -70,6 +70,7 @@ export
     Rational,
     Regex,
     RegexMatch,
+    Returns,
     RoundFromZero,
     RoundDown,
     RoundingMode,
@@ -168,6 +169,10 @@ export
     ≢,
     xor,
     ⊻,
+    nand,
+    nor,
+    ⊼,
+    ⊽,
     %,
     ÷,
     &,
@@ -386,6 +391,7 @@ export
     first,
     hcat,
     hvcat,
+    hvncat,
     indexin,
     argmax,
     argmin,
@@ -498,6 +504,7 @@ export
     count,
     delete!,
     deleteat!,
+    keepat!,
     eltype,
     empty!,
     empty,
@@ -670,6 +677,7 @@ export
     istaskstarted,
     istaskfailed,
     lock,
+    @lock,
     notify,
     ReentrantLock,
     schedule,
@@ -682,6 +690,7 @@ export
     timedwait,
     asyncmap,
     asyncmap!,
+    errormonitor,
 
 # channels
     take!,
@@ -692,9 +701,11 @@ export
 
 # missing values
     coalesce,
+    @coalesce,
     ismissing,
     missing,
     skipmissing,
+    @something,
     something,
     isnothing,
     nonmissingtype,
@@ -707,6 +718,7 @@ export
 # errors
     backtrace,
     catch_backtrace,
+    current_exceptions,
     error,
     rethrow,
     retry,
@@ -719,6 +731,9 @@ export
     convert,
     getproperty,
     setproperty!,
+    swapproperty!,
+    modifyproperty!,
+    replaceproperty!,
     fieldoffset,
     fieldname,
     fieldnames,
@@ -788,6 +803,7 @@ export
 
 # I/O and events
     close,
+    closewrite,
     countlines,
     eachline,
     readeach,
@@ -821,6 +837,7 @@ export
     readline,
     readlines,
     readuntil,
+    redirect_stdio,
     redirect_stderr,
     redirect_stdin,
     redirect_stdout,
@@ -879,6 +896,7 @@ export
     filemode,
     filesize,
     gperm,
+    hardlink,
     isblockdev,
     ischardev,
     isdir,
@@ -912,7 +930,7 @@ export
     uperm,
     walkdir,
 
-# external processes ## TODO: whittle down these exports.
+# external processes
     detach,
     getpid,
     ignorestatus,
@@ -1002,6 +1020,9 @@ export
     @polly,
 
     @assert,
+    @atomic,
+    @atomicswap,
+    @atomicreplace,
     @__dot__,
     @enum,
     @label,
diff --git a/base/expr.jl b/base/expr.jl
index 4d6401b002a76d..1af1e9486068ea 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -31,6 +31,9 @@ end
 
 ## expressions ##
 
+isexpr(@nospecialize(ex), head::Symbol) = isa(ex, Expr) && ex.head === head
+isexpr(@nospecialize(ex), head::Symbol, n::Int) = isa(ex, Expr) && ex.head === head && length(ex.args) == n
+
 copy(e::Expr) = exprarray(e.head, copy_exprargs(e.args))
 
 # copy parts of an AST that the compiler mutates
@@ -185,18 +188,74 @@ Give a hint to the compiler that this function is worth inlining.
 Small functions typically do not need the `@inline` annotation,
 as the compiler does it automatically. By using `@inline` on bigger functions,
 an extra nudge can be given to the compiler to inline it.
-This is shown in the following example:
+
+`@inline` can be applied immediately before the definition or in its function body.
 
 ```julia
-@inline function bigfunction(x)
-    #=
-        Function Definition
-    =#
+# annotate long-form definition
+@inline function longdef(x)
+    ...
 end
+
+# annotate short-form definition
+@inline shortdef(x) = ...
+
+# annotate anonymous function that a `do` block creates
+f() do
+    @inline
+    ...
+end
+```
+
+!!! compat "Julia 1.8"
+    The usage within a function body requires at least Julia 1.8.
+
+---
+    @inline block
+
+Give a hint to the compiler that calls within `block` are worth inlining.
+
+```julia
+# The compiler will try to inline `f`
+@inline f(...)
+
+# The compiler will try to inline `f`, `g` and `+`
+@inline f(...) + g(...)
 ```
+
+!!! note
+    A callsite annotation always has the precedence over the annotation applied to the
+    definition of the called function:
+    ```julia
+    @noinline function explicit_noinline(args...)
+        # body
+    end
+
+    let
+        @inline explicit_noinline(args...) # will be inlined
+    end
+    ```
+
+!!! note
+    When there are nested callsite annotations, the innermost annotation has the precedence:
+    ```julia
+    @noinline let a0, b0 = ...
+        a = @inline f(a0)  # the compiler will try to inline this call
+        b = f(b0)          # the compiler will NOT try to inline this call
+        return a, b
+    end
+    ```
+
+!!! warning
+    Although a callsite annotation will try to force inlining in regardless of the cost model,
+    there are still chances it can't succeed in it. Especially, recursive calls can not be
+    inlined even if they are annotated as `@inline`d.
+
+!!! compat "Julia 1.8"
+    The callsite annotation requires at least Julia 1.8.
 """
-macro inline(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :inline) : ex)
+macro inline(x)
+    return annotate_meta_def_or_block(x, :inline)
 end
 
 """
@@ -206,21 +265,74 @@ Give a hint to the compiler that it should not inline a function.
 
 Small functions are typically inlined automatically.
 By using `@noinline` on small functions, auto-inlining can be
-prevented. This is shown in the following example:
+prevented.
+
+`@noinline` can be applied immediately before the definition or in its function body.
 
 ```julia
-@noinline function smallfunction(x)
-    #=
-        Function Definition
-    =#
+# annotate long-form definition
+@noinline function longdef(x)
+    ...
 end
+
+# annotate short-form definition
+@noinline shortdef(x) = ...
+
+# annotate anonymous function that a `do` block creates
+f() do
+    @noinline
+    ...
+end
+```
+
+!!! compat "Julia 1.8"
+    The usage within a function body requires at least Julia 1.8.
+
+---
+    @noinline block
+
+Give a hint to the compiler that it should not inline the calls within `block`.
+
+```julia
+# The compiler will try to not inline `f`
+@noinline f(...)
+
+# The compiler will try to not inline `f`, `g` and `+`
+@noinline f(...) + g(...)
 ```
 
+!!! note
+    A callsite annotation always has the precedence over the annotation applied to the
+    definition of the called function:
+    ```julia
+    @inline function explicit_inline(args...)
+        # body
+    end
+
+    let
+        @noinline explicit_inline(args...) # will not be inlined
+    end
+    ```
+
+!!! note
+    When there are nested callsite annotations, the innermost annotation has the precedence:
+    ```julia
+    @inline let a0, b0 = ...
+        a = @noinline f(a0)  # the compiler will NOT try to inline this call
+        b = f(b0)            # the compiler will try to inline this call
+        return a, b
+    end
+    ```
+
+!!! compat "Julia 1.8"
+    The callsite annotation requires at least Julia 1.8.
+
+---
 !!! note
     If the function is trivial (for example returning a constant) it might get inlined anyway.
 """
-macro noinline(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :noinline) : ex)
+macro noinline(x)
+    return annotate_meta_def_or_block(x, :noinline)
 end
 
 """
@@ -273,6 +385,15 @@ end
 
 ## some macro utilities ##
 
+unwrap_macrocalls(@nospecialize(x)) = x
+function unwrap_macrocalls(ex::Expr)
+    inner = ex
+    while inner.head === :macrocall
+        inner = inner.args[end]::Expr
+    end
+    return inner
+end
+
 function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
     if isempty(args)
         tag = sym
@@ -280,10 +401,7 @@ function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
         tag = Expr(sym, args...)::Expr
     end
 
-    inner = ex
-    while inner.head === :macrocall
-        inner = inner.args[end]::Expr
-    end
+    inner = unwrap_macrocalls(ex)
 
     idx, exargs = findmeta(inner)
     if idx != 0
@@ -333,8 +451,23 @@ function findmetaarg(metaargs, sym)
     return 0
 end
 
-function is_short_function_def(ex)
-    ex.head === :(=) || return false
+function annotate_meta_def_or_block(@nospecialize(ex), meta::Symbol)
+    inner = unwrap_macrocalls(ex)
+    if is_function_def(inner)
+        # annotation on a definition
+        return esc(pushmeta!(ex, meta))
+    else
+        # annotation on a block
+        return Expr(:block,
+                    Expr(meta, true),
+                    Expr(:local, Expr(:(=), :val, esc(ex))),
+                    Expr(meta, false),
+                    :val)
+    end
+end
+
+function is_short_function_def(@nospecialize(ex))
+    isexpr(ex, :(=)) || return false
     while length(ex.args) >= 1 && isa(ex.args[1], Expr)
         (ex.args[1].head === :call) && return true
         (ex.args[1].head === :where || ex.args[1].head === :(::)) || return false
@@ -342,9 +475,11 @@ function is_short_function_def(ex)
     end
     return false
 end
+is_function_def(@nospecialize(ex)) =
+    return isexpr(ex, :function) || is_short_function_def(ex) || isexpr(ex, :->)
 
 function findmeta(ex::Expr)
-    if ex.head === :function || is_short_function_def(ex) || ex.head === :->
+    if is_function_def(ex)
         body = ex.args[2]::Expr
         body.head === :block || error(body, " is not a block expression")
         return findmeta_block(ex.args)
@@ -408,7 +543,7 @@ the global scope or depending on mutable elements.
 See [Metaprogramming](@ref) for further details.
 
 ## Example:
-```julia
+```jldoctest
 julia> @generated function bar(x)
            if x <: Integer
                return :(x ^ 2)
@@ -434,7 +569,10 @@ macro generated(f)
                          Expr(:block,
                               lno,
                               Expr(:if, Expr(:generated),
-                                   body,
+                                   # https://github.com/JuliaLang/julia/issues/25678
+                                   Expr(:block,
+                                        :(local tmp = $body),
+                                        :(if tmp isa Core.CodeInfo; return tmp; else tmp; end)),
                                    Expr(:block,
                                         Expr(:meta, :generated_only),
                                         Expr(:return, nothing))))))
@@ -442,3 +580,234 @@ macro generated(f)
         error("invalid syntax; @generated must be used with a function definition")
     end
 end
+
+
+"""
+    @atomic var
+    @atomic order ex
+
+Mark `var` or `ex` as being performed atomically, if `ex` is a supported expression.
+
+    @atomic a.b.x = new
+    @atomic a.b.x += addend
+    @atomic :acquire_release a.b.x = new
+    @atomic :acquire_release a.b.x += addend
+
+Perform the store operation expressed on the right atomically and return the
+new value.
+
+With `=`, this operation translates to a `setproperty!(a.b, :x, new)` call.
+With any operator also, this operation translates to a `modifyproperty!(a.b,
+:x, +, addend)[2]` call.
+
+    @atomic a.b.x max arg2
+    @atomic a.b.x + arg2
+    @atomic max(a.b.x, arg2)
+    @atomic :acquire_release max(a.b.x, arg2)
+    @atomic :acquire_release a.b.x + arg2
+    @atomic :acquire_release a.b.x max arg2
+
+Perform the binary operation expressed on the right atomically. Store the
+result into the field in the first argument and return the values `(old, new)`.
+
+This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` call.
+
+
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
+
+```jldoctest
+julia> mutable struct Atomic{T}; @atomic x::T; end
+
+julia> a = Atomic(1)
+Atomic{Int64}(1)
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+1
+
+julia> @atomic :sequentially_consistent a.x = 2 # set field x of a, with sequential consistency
+2
+
+julia> @atomic a.x += 1 # increment field x of a, with sequential consistency
+3
+
+julia> @atomic a.x + 1 # increment field x of a, with sequential consistency
+3 => 4
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+4
+
+julia> @atomic max(a.x, 10) # change field x of a to the max value, with sequential consistency
+4 => 10
+
+julia> @atomic a.x max 5 # again change field x of a to the max value, with sequential consistency
+10 => 10
+```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
+"""
+macro atomic(ex)
+    if !isa(ex, Symbol) && !is_expr(ex, :(::))
+        return make_atomic(QuoteNode(:sequentially_consistent), ex)
+    end
+    return esc(Expr(:atomic, ex))
+end
+macro atomic(order, ex)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomic(order, ex)
+end
+macro atomic(a1, op, a2)
+    return make_atomic(QuoteNode(:sequentially_consistent), a1, op, a2)
+end
+macro atomic(order, a1, op, a2)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomic(order, a1, op, a2)
+end
+function make_atomic(order, ex)
+    @nospecialize
+    if ex isa Expr
+        if isexpr(ex, :., 2)
+            l, r = esc(ex.args[1]), esc(ex.args[2])
+            return :(getproperty($l, $r, $order))
+        elseif isexpr(ex, :call, 3)
+            return make_atomic(order, ex.args[2], ex.args[1], ex.args[3])
+        elseif ex.head === :(=)
+            l, r = ex.args[1], esc(ex.args[2])
+            if is_expr(l, :., 2)
+                ll, lr = esc(l.args[1]), esc(l.args[2])
+                return :(setproperty!($ll, $lr, $r, $order))
+            end
+        end
+        if length(ex.args) == 2
+            if ex.head === :(+=)
+                op = :+
+            elseif ex.head === :(-=)
+                op = :-
+            elseif @isdefined string
+                shead = string(ex.head)
+                if endswith(shead, '=')
+                    op = Symbol(shead[1:prevind(shead, end)])
+                end
+            end
+            if @isdefined(op)
+                return Expr(:ref, make_atomic(order, ex.args[1], op, ex.args[2]), 2)
+            end
+        end
+    end
+    error("could not parse @atomic expression $ex")
+end
+function make_atomic(order, a1, op, a2)
+    @nospecialize
+    is_expr(a1, :., 2) || error("@atomic modify expression missing field access")
+    a1l, a1r, op, a2 = esc(a1.args[1]), esc(a1.args[2]), esc(op), esc(a2)
+    return :(modifyproperty!($a1l, $a1r, $op, $a2, $order))
+end
+
+
+"""
+    @atomicswap a.b.x = new
+    @atomicswap :sequentially_consistent a.b.x = new
+
+Stores `new` into `a.b.x` and returns the old value of `a.b.x`.
+
+This operation translates to a `swapproperty!(a.b, :x, new)` call.
+
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
+
+```jldoctest
+julia> mutable struct Atomic{T}; @atomic x::T; end
+
+julia> a = Atomic(1)
+Atomic{Int64}(1)
+
+julia> @atomicswap a.x = 2+2 # replace field x of a with 4, with sequential consistency
+1
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+4
+```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
+"""
+macro atomicswap(order, ex)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomicswap(order, ex)
+end
+macro atomicswap(ex)
+    return make_atomicswap(QuoteNode(:sequentially_consistent), ex)
+end
+function make_atomicswap(order, ex)
+    @nospecialize
+    is_expr(ex, :(=), 2) || error("@atomicswap expression missing assignment")
+    l, val = ex.args[1], esc(ex.args[2])
+    is_expr(l, :., 2) || error("@atomicswap expression missing field access")
+    ll, lr = esc(l.args[1]), esc(l.args[2])
+    return :(swapproperty!($ll, $lr, $val, $order))
+end
+
+
+"""
+    @atomicreplace a.b.x expected => desired
+    @atomicreplace :sequentially_consistent a.b.x expected => desired
+    @atomicreplace :sequentially_consistent :monotonic a.b.x expected => desired
+
+Perform the conditional replacement expressed by the pair atomically, returning
+the values `(old, success::Bool)`. Where `success` indicates whether the
+replacement was completed.
+
+This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` call.
+
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
+
+```jldoctest
+julia> mutable struct Atomic{T}; @atomic x::T; end
+
+julia> a = Atomic(1)
+Atomic{Int64}(1)
+
+julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
+(old = 1, success = true)
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+2
+
+julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
+(old = 2, success = false)
+
+julia> xchg = 2 => 0; # replace field x of a with 0 if it was 1, with sequential consistency
+
+julia> @atomicreplace a.x xchg
+(old = 2, success = true)
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+0
+```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
+"""
+macro atomicreplace(success_order, fail_order, ex, old_new)
+    fail_order isa QuoteNode || (fail_order = esc(fail_order))
+    success_order isa QuoteNode || (success_order = esc(success_order))
+    return make_atomicreplace(success_order, fail_order, ex, old_new)
+end
+macro atomicreplace(order, ex, old_new)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomicreplace(order, order, ex, old_new)
+end
+macro atomicreplace(ex, old_new)
+    return make_atomicreplace(QuoteNode(:sequentially_consistent), QuoteNode(:sequentially_consistent), ex, old_new)
+end
+function make_atomicreplace(success_order, fail_order, ex, old_new)
+    @nospecialize
+    is_expr(ex, :., 2) || error("@atomicreplace expression missing field access")
+    ll, lr = esc(ex.args[1]), esc(ex.args[2])
+    if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
+        exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
+        return :(replaceproperty!($ll, $lr, $exp, $rep, $success_order, $fail_order))
+    else
+        old_new = esc(old_new)
+        return :(replaceproperty!($ll, $lr, $old_new::Pair..., $success_order, $fail_order))
+    end
+end
diff --git a/base/fastmath.jl b/base/fastmath.jl
index a7d8b86fcb26ac..c01a8a5b225f7b 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -273,6 +273,9 @@ end
 
 
 # Math functions
+exp2_fast(x::Union{Float32,Float64})  = Base.Math.exp2_fast(x)
+exp_fast(x::Union{Float32,Float64})   = Base.Math.exp_fast(x)
+exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x)
 
 # builtins
 
@@ -282,28 +285,6 @@ pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already v
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)
 
 sqrt_fast(x::FloatTypes) = sqrt_llvm_fast(x)
-
-# libm
-
-const libm = Base.libm_name
-
-for f in (:acosh, :asinh, :atanh, :cbrt,
-          :cosh, :exp2, :expm1, :log10, :log1p, :log2,
-          :log, :sinh, :tanh)
-    f_fast = fast_op[f]
-    @eval begin
-        $f_fast(x::Float32) =
-            ccall(($(string(f,"f")),libm), Float32, (Float32,), x)
-        $f_fast(x::Float64) =
-            ccall(($(string(f)),libm), Float64, (Float64,), x)
-    end
-end
-
-pow_fast(x::Float32, y::Float32) =
-    ccall(("powf",libm), Float32, (Float32,Float32), x, y)
-pow_fast(x::Float64, y::Float64) =
-    ccall(("pow",libm), Float64, (Float64,Float64), x, y)
-
 sincos_fast(v::FloatTypes) = sincos(v)
 
 @inline function sincos_fast(v::Float16)
diff --git a/base/file.jl b/base/file.jl
index e1e323700344f7..85450ff2d36450 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -8,6 +8,7 @@ export
     chown,
     cp,
     cptree,
+    hardlink,
     mkdir,
     mkpath,
     mktemp,
@@ -34,6 +35,8 @@ export
 
 Get the current working directory.
 
+See also: [`cd`](@ref), [`tempdir`](@ref).
+
 # Examples
 ```julia-repl
 julia> pwd()
@@ -67,6 +70,8 @@ end
 
 Set the current working directory.
 
+See also: [`pwd`](@ref), [`mkdir`](@ref), [`mkpath`](@ref), [`mktempdir`](@ref).
+
 # Examples
 ```julia-repl
 julia> cd("/home/JuliaUser/Projects/julia")
@@ -354,6 +359,13 @@ If `follow_symlinks=false`, and `src` is a symbolic link, `dst` will be created
 symbolic link. If `follow_symlinks=true` and `src` is a symbolic link, `dst` will be a copy
 of the file or directory `src` refers to.
 Return `dst`.
+
+!!! note
+    The `cp` function is different from the `cp` command. The `cp` function always operates on
+    the assumption that `dst` is a file, while the command does different things depending
+    on whether `dst` is a directory or a file.
+    Using `force=true` when `dst` is a directory will result in loss of all the contents present
+    in the `dst` directory, and `dst` will become a file that has the contents of `src` instead.
 """
 function cp(src::AbstractString, dst::AbstractString; force::Bool=false,
                                                       follow_symlinks::Bool=false)
@@ -673,6 +685,8 @@ the temporary directory is automatically deleted when the process exits.
     The `cleanup` keyword argument was added in Julia 1.3. Relatedly, starting from 1.3,
     Julia will remove the temporary paths created by `mktempdir` when the Julia process
     exits, unless `cleanup` is explicitly set to `false`.
+
+See also: [`mktemp`](@ref), [`mkdir`](@ref).
 """
 function mktempdir(parent::AbstractString=tempdir();
     prefix::AbstractString=temp_prefix, cleanup::Bool=true)
@@ -707,6 +721,8 @@ end
 
 Apply the function `f` to the result of [`mktemp(parent)`](@ref) and remove the
 temporary file upon completion.
+
+See also: [`mktempdir`](@ref).
 """
 function mktemp(fn::Function, parent::AbstractString=tempdir())
     (tmp_path, tmp_io) = mktemp(parent, cleanup=false)
@@ -730,6 +746,8 @@ end
 Apply the function `f` to the result of [`mktempdir(parent; prefix)`](@ref) and remove the
 temporary directory all of its contents upon completion.
 
+See also: [`mktemp`](@ref), [`mkdir`](@ref).
+
 !!! compat "Julia 1.2"
     The `prefix` keyword argument was added in Julia 1.2.
 """
@@ -988,6 +1006,26 @@ if Sys.iswindows()
     const UV__EPERM              = -4048
 end
 
+"""
+    hardlink(src::AbstractString, dst::AbstractString)
+
+Creates a hard link to an existing source file `src` with the name `dst`. The
+destination, `dst`, must not exist.
+
+See also: [`symlink`](@ref).
+
+!!! compat "Julia 1.8"
+    This method was added in Julia 1.8.
+"""
+function hardlink(src::AbstractString, dst::AbstractString)
+    err = ccall(:jl_fs_hardlink, Int32, (Cstring, Cstring), src, dst)
+    if err < 0
+        msg = "hardlink($(repr(src)), $(repr(dst)))"
+        uv_error(msg, err)
+    end
+    return nothing
+end
+
 """
     symlink(target::AbstractString, link::AbstractString; dir_target = false)
 
@@ -1010,6 +1048,8 @@ a junction point will be used.  Best practice for creating symlinks on Windows
 is to create them only after the files/directories they reference are already
 created.
 
+See also: [`hardlink`](@ref).
+
 !!! note
     This function raises an error under operating systems that do not support
     soft symbolic links, such as Windows XP.
diff --git a/base/filesystem.jl b/base/filesystem.jl
index 191f6537d80418..dfa881068c6abd 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -4,6 +4,22 @@
 
 module Filesystem
 
+const S_IFDIR  = 0o040000  # directory
+const S_IFCHR  = 0o020000  # character device
+const S_IFBLK  = 0o060000  # block device
+const S_IFREG  = 0o100000  # regular file
+const S_IFIFO  = 0o010000  # fifo (named pipe)
+const S_IFLNK  = 0o120000  # symbolic link
+const S_IFSOCK = 0o140000  # socket file
+const S_IFMT   = 0o170000
+
+const S_ISUID = 0o4000  # set UID bit
+const S_ISGID = 0o2000  # set GID bit
+const S_ENFMT = S_ISGID # file locking enforcement
+const S_ISVTX = 0o1000  # sticky bit
+const S_IRWXU = 0o0700  # mask for owner permissions
+const S_IRUSR = 0o0400  # read by owner
+
 const S_IRUSR = 0o400
 const S_IWUSR = 0o200
 const S_IXUSR = 0o100
@@ -42,7 +58,7 @@ import .Base:
     IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
     bytesavailable, position, read, read!, readavailable, seek, seekend, show,
     skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
-    rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
+    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
 
 import .Base.RefValue
 
@@ -76,6 +92,7 @@ if OS_HANDLE !== RawFD
 end
 
 rawhandle(file::File) = file.handle
+setup_stdio(file::File, ::Bool) = (file, false)
 
 # Filesystem.open, not Base.open
 function open(path::AbstractString, flags::Integer, mode::Integer=0)
diff --git a/base/float.jl b/base/float.jl
index cb4000b51fac24..867abd30eeed6d 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -36,6 +36,20 @@ const Inf = Inf64
     Inf, Inf64
 
 Positive infinity of type [`Float64`](@ref).
+
+See also: [`isfinite`](@ref), [`typemax`](@ref), [`NaN`](@ref), [`Inf32`](@ref).
+
+# Examples
+```jldoctest
+julia> π/0
+Inf
+
+julia> +1.0 / -0.0
+-Inf
+
+julia> ℯ^-Inf
+0.0
+```
 """
 Inf, Inf64
 
@@ -44,6 +58,20 @@ const NaN = NaN64
     NaN, NaN64
 
 A not-a-number value of type [`Float64`](@ref).
+
+See also: [`isnan`](@ref), [`missing`](@ref), [`NaN32`](@ref), [`Inf`](@ref).
+
+# Examples
+```jldoctest
+julia> 0/0
+NaN
+
+julia> Inf - Inf
+NaN
+
+julia> NaN == NaN, isequal(NaN, NaN), NaN === NaN
+(false, true, true)
+```
 """
 NaN, NaN64
 
@@ -226,6 +254,17 @@ Bool(x::Float16) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool,
     float(x)
 
 Convert a number or array to a floating point data type.
+
+See also: [`complex`](@ref), [`oftype`](@ref), [`convert`](@ref).
+
+# Examples
+```jldoctest
+julia> float(1:1000)
+1.0:1.0:1000.0
+
+julia> float(typemax(Int32))
+2.147483647e9
+```
 """
 float(x) = AbstractFloat(x)
 
@@ -251,8 +290,18 @@ float(::Type{T}) where {T<:AbstractFloat} = T
     unsafe_trunc(T, x)
 
 Return the nearest integral value of type `T` whose absolute value is
-less than or equal to `x`. If the value is not representable by `T`, an arbitrary value will
-be returned.
+less than or equal to the absolute value of `x`. If the value is not representable by `T`,
+an arbitrary value will be returned.
+See also [`trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> unsafe_trunc(Int, -2.2)
+-2
+
+julia> unsafe_trunc(Int, NaN)
+-9223372036854775808
+```
 """
 function unsafe_trunc end
 
@@ -400,9 +449,19 @@ end
 isequal(x::Float16, y::Float16) = fpiseq(x, y)
 isequal(x::Float32, y::Float32) = fpiseq(x, y)
 isequal(x::Float64, y::Float64) = fpiseq(x, y)
-isless( x::Float16, y::Float16) = fpislt(x, y)
-isless( x::Float32, y::Float32) = fpislt(x, y)
-isless( x::Float64, y::Float64) = fpislt(x, y)
+
+# interpret as sign-magnitude integer
+@inline function _fpint(x)
+    IntT = inttype(typeof(x))
+    ix = reinterpret(IntT, x)
+    return ifelse(ix < zero(IntT), ix ⊻ typemax(IntT), ix)
+end
+
+@inline function isless(a::T, b::T) where T<:IEEEFloat
+    (isnan(a) || isnan(b)) && return !isnan(a)
+
+    return _fpint(a) < _fpint(b)
+end
 
 # Exact Float (Tf) vs Integer (Ti) comparisons
 # Assumes:
@@ -418,7 +477,7 @@ isless( x::Float64, y::Float64) = fpislt(x, y)
 #  b. unsafe_convert undefined behaviour if fy == Tf(typemax(Ti))
 #     (but consequently x == fy > y)
 for Ti in (Int64,UInt64,Int128,UInt128)
-    for Tf in (Float16,Float32,Float64)
+    for Tf in (Float32,Float64)
         @eval begin
             function ==(x::$Tf, y::$Ti)
                 fy = ($Tf)(y)
@@ -469,6 +528,8 @@ abs(x::Float64) = abs_float(x)
 
 Test whether a number value is a NaN, an indeterminate value which is neither an infinity
 nor a finite number ("not a number").
+
+See also: [`iszero`](@ref), [`isone`](@ref), [`isinf`](@ref), [`ismissing`](@ref).
 """
 isnan(x::AbstractFloat) = (x != x)::Bool
 isnan(x::Number) = false
@@ -481,6 +542,8 @@ isfinite(x::Integer) = true
     isinf(f) -> Bool
 
 Test whether a number is infinite.
+
+See also: [`Inf`](@ref), [`iszero`](@ref), [`isfinite`](@ref), [`isnan`](@ref).
 """
 isinf(x::Real) = !isnan(x) & !isfinite(x)
 
@@ -641,7 +704,7 @@ uabs(x::BitSigned) = unsigned(abs(x))
     nextfloat(x::AbstractFloat, n::Integer)
 
 The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
-applications of `prevfloat` if `n < 0`.
+applications of [`prevfloat`](@ref) if `n < 0`.
 """
 function nextfloat(f::IEEEFloat, d::Integer)
     F = typeof(f)
@@ -686,6 +749,8 @@ end
 
 Return the smallest floating point number `y` of the same type as `x` such `x < y`. If no
 such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`.
+
+See also: [`prevfloat`](@ref), [`eps`](@ref), [`issubnormal`](@ref).
 """
 nextfloat(x::AbstractFloat) = nextfloat(x,1)
 
@@ -693,7 +758,7 @@ nextfloat(x::AbstractFloat) = nextfloat(x,1)
     prevfloat(x::AbstractFloat, n::Integer)
 
 The result of `n` iterative applications of `prevfloat` to `x` if `n >= 0`, or `-n`
-applications of `nextfloat` if `n < 0`.
+applications of [`nextfloat`](@ref) if `n < 0`.
 """
 prevfloat(x::AbstractFloat, d::Integer) = nextfloat(x, -d)
 
@@ -815,6 +880,8 @@ floatmin(x::T) where {T<:AbstractFloat} = floatmin(T)
 
 Return the largest finite number representable by the floating-point type `T`.
 
+See also: [`typemax`](@ref), [`floatmin`](@ref), [`eps`](@ref).
+
 # Examples
 ```jldoctest
 julia> floatmax(Float16)
@@ -825,6 +892,9 @@ julia> floatmax(Float32)
 
 julia> floatmax()
 1.7976931348623157e308
+
+julia> typemax(Float64)
+Inf
 ```
 """
 floatmax(x::T) where {T<:AbstractFloat} = floatmax(T)
@@ -879,6 +949,8 @@ is the nearest floating point number to ``y``, then
 |y-x| \\leq \\operatorname{eps}(x)/2.
 ```
 
+See also: [`nextfloat`](@ref), [`issubnormal`](@ref), [`floatmax`](@ref).
+
 # Examples
 ```jldoctest
 julia> eps(1.0)
@@ -910,6 +982,17 @@ bswap(x::IEEEFloat) = bswap_int(x)
 uinttype(::Type{Float64}) = UInt64
 uinttype(::Type{Float32}) = UInt32
 uinttype(::Type{Float16}) = UInt16
+inttype(::Type{Float64}) = Int64
+inttype(::Type{Float32}) = Int32
+inttype(::Type{Float16}) = Int16
+# float size of integer
+floattype(::Type{UInt64}) = Float64
+floattype(::Type{UInt32}) = Float32
+floattype(::Type{UInt16}) = Float16
+floattype(::Type{Int64}) = Float64
+floattype(::Type{Int32}) = Float32
+floattype(::Type{Int16}) = Float16
+
 
 ## Array operations on floating point numbers ##
 
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index 2bca2989bab0b3..bae27d642e7c72 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -166,6 +166,16 @@ function _round_invstep(x, invstep, r::RoundingMode)
     return y
 end
 
+# round x to multiples of 1/(invstepsqrt^2)
+# Using square root of step prevents overflowing
+function _round_invstepsqrt(x, invstepsqrt, r::RoundingMode)
+    y = round((x * invstepsqrt) * invstepsqrt, r) / invstepsqrt / invstepsqrt
+    if !isfinite(y)
+        return x
+    end
+    return y
+end
+
 # round x to multiples of step
 function _round_step(x, step, r::RoundingMode)
     # TODO: use div with rounding mode
@@ -186,10 +196,15 @@ function _round_digits(x, r::RoundingMode, digits::Integer, base)
     fx = float(x)
     if digits >= 0
         invstep = oftype(fx, base)^digits
-        _round_invstep(fx, invstep, r)
+        if isfinite(invstep)
+            return _round_invstep(fx, invstep, r)
+        else
+            invstepsqrt = oftype(fx, base)^oftype(fx, digits/2)
+            return _round_invstepsqrt(fx, invstepsqrt, r)
+        end
     else
         step = oftype(fx, base)^-digits
-        _round_step(fx, step, r)
+        return _round_step(fx, step, r)
     end
 end
 
@@ -225,13 +240,15 @@ end
 """
     isapprox(x, y; atol::Real=0, rtol::Real=atol>0 ? 0 : √eps, nans::Bool=false[, norm::Function])
 
-Inexact equality comparison: `true` if `norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))`. The
-default `atol` is zero and the default `rtol` depends on the types of `x` and `y`. The keyword
-argument `nans` determines whether or not NaN values are considered equal (defaults to false).
+Inexact equality comparison. Two numbers compare equal if their relative distance *or* their
+absolute distance is within tolerance bounds: `isapprox` returns `true` if
+`norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))`. The default `atol` is zero and the
+default `rtol` depends on the types of `x` and `y`. The keyword argument `nans` determines
+whether or not NaN values are considered equal (defaults to false).
 
 For real or complex floating-point values, if an `atol > 0` is not specified, `rtol` defaults to
 the square root of [`eps`](@ref) of the type of `x` or `y`, whichever is bigger (least precise).
-This corresponds to requiring equality of about half of the significand digits. Otherwise,
+This corresponds to requiring equality of about half of the significant digits. Otherwise,
 e.g. for integer arguments or if an `atol > 0` is supplied, `rtol` defaults to zero.
 
 The `norm` keyword defaults to `abs` for numeric `(x,y)` and to `LinearAlgebra.norm` for
@@ -259,13 +276,16 @@ but an absurdly large tolerance if `x` is the
 
 # Examples
 ```jldoctest
-julia> 0.1 ≈ (0.1 - 1e-10)
+julia> isapprox(0.1, 0.15; atol=0.05)
 true
 
-julia> isapprox(10, 11; atol = 2)
+julia> isapprox(0.1, 0.15; rtol=0.34)
 true
 
-julia> isapprox([10.0^9, 1.0], [10.0^9, 2.0])
+julia> isapprox(0.1, 0.15; rtol=0.33)
+false
+
+julia> 0.1 + 1e-10 ≈ 0.1
 true
 
 julia> 1e-10 ≈ 0
@@ -273,6 +293,9 @@ false
 
 julia> isapprox(1e-10, 0, atol=1e-8)
 true
+
+julia> isapprox([10.0^9, 1.0], [10.0^9, 2.0]) # using `norm`
+true
 ```
 """
 function isapprox(x::Number, y::Number;
@@ -287,6 +310,9 @@ end
 Create a function that compares its argument to `x` using `≈`, i.e. a function equivalent to `y -> y ≈ x`.
 
 The keyword arguments supported here are the same as those in the 2-argument `isapprox`.
+
+!!! compat "Julia 1.5"
+    This method requires Julia 1.5 or later.
 """
 isapprox(y; kwargs...) = x -> isapprox(x, y; kwargs...)
 
diff --git a/base/gcutils.jl b/base/gcutils.jl
index 1280b4ab71afca..7010f8fa7d2f1b 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -51,7 +51,7 @@ function finalizer(@nospecialize(f), @nospecialize(o))
 end
 
 function finalizer(f::Ptr{Cvoid}, o::T) where T
-    @_inline_meta
+    @inline
     if !ismutable(o)
         error("objects of type ", typeof(o), " cannot be finalized")
     end
@@ -65,8 +65,8 @@ end
 
 Immediately run finalizers registered for object `x`.
 """
-finalize(@nospecialize(o)) = ccall(:jl_finalize_th, Cvoid, (Ptr{Cvoid}, Any,),
-                                   Core.getptls(), o)
+finalize(@nospecialize(o)) = ccall(:jl_finalize_th, Cvoid, (Any, Any,),
+                                   current_task(), o)
 
 """
     Base.GC
@@ -117,7 +117,7 @@ another Task or thread.
 enable_finalizers(on::Bool) = on ? enable_finalizers() : disable_finalizers()
 
 function enable_finalizers()
-    Base.@_inline_meta
+    Base.@inline
     ccall(:jl_gc_enable_finalizers_internal, Cvoid, ())
     if unsafe_load(cglobal(:jl_gc_have_pending_finalizers, Cint)) != 0
         ccall(:jl_gc_run_pending_finalizers, Cvoid, (Ptr{Cvoid},), C_NULL)
@@ -125,7 +125,7 @@ function enable_finalizers()
 end
 
 function disable_finalizers()
-    Base.@_inline_meta
+    Base.@inline
     ccall(:jl_gc_disable_finalizers_internal, Cvoid, ())
 end
 
diff --git a/base/generator.jl b/base/generator.jl
index e5b3e46f883617..1317aaf5223e05 100644
--- a/base/generator.jl
+++ b/base/generator.jl
@@ -40,7 +40,7 @@ Generator(::Type{T}, iter::I) where {T,I} = Generator{I,Type{T}}(T, iter)
 Generator(::Type{T}, I1, I2, Is...) where {T} = Generator(a->T(a...), zip(I1, I2, Is...))
 
 function iterate(g::Generator, s...)
-    @_inline_meta
+    @inline
     y = iterate(g.iter, s...)
     y === nothing && return nothing
     y = y::Tuple{Any, Any} # try to give inference some idea of what to expect about the behavior of the next line
diff --git a/base/gmp.jl b/base/gmp.jl
index d0446e7d3d1692..1a04becedf7824 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -4,13 +4,13 @@ module GMP
 
 export BigInt
 
-import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor,
+import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor, nand, nor,
              binomial, cmp, convert, div, divrem, factorial, cld, fld, gcd, gcdx, lcm, mod,
              ndigits, promote_rule, rem, show, isqrt, string, powermod,
-             sum, trailing_zeros, trailing_ones, count_ones, tryparse_internal,
+             sum, prod, trailing_zeros, trailing_ones, count_ones, tryparse_internal,
              bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
              widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
-             sign, hastypemax, isodd, digits!
+             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer
 
 if Clong == Int32
     const ClongMax = Union{Int8, Int16, Int32}
@@ -94,10 +94,10 @@ const ALLOC_OVERFLOW_FUNCTION = Ref(false)
 function __init__()
     try
         if version().major != VERSION.major || bits_per_limb() != BITS_PER_LIMB
-            msg = bits_per_limb() != BITS_PER_LIMB ? error : warn
-            msg("The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))\n",
-                "does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).\n",
-                "Please rebuild Julia.")
+            msg = """The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))
+                     does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).
+                     Please rebuild Julia."""
+            bits_per_limb() != BITS_PER_LIMB ? @error(msg) : @warn(msg)
         end
 
         ccall((:__gmp_set_memory_functions, :libgmp), Cvoid,
@@ -343,6 +343,7 @@ end
 rem(x::Integer, ::Type{BigInt}) = BigInt(x)
 
 isodd(x::BigInt) = MPZ.tstbit(x, 0)
+iseven(x::BigInt) = !isodd(x)
 
 function (::Type{T})(x::BigInt) where T<:Base.BitUnsigned
     if sizeof(T) < sizeof(Limb)
@@ -631,13 +632,26 @@ function gcdx(a::BigInt, b::BigInt)
     g, s, t
 end
 
-sum(arr::AbstractArray{BigInt}) = foldl(MPZ.add!, arr; init=BigInt(0))
-# Note: a similar implementation for `prod` won't be efficient:
-# 1) the time complexity of the allocations is negligible compared to the multiplications
-# 2) assuming arr contains similarly sized BigInts, the multiplications are much more
-# performant when doing e.g. ((a1*a2)*(a3*a4))*(...) rather than a1*(a2*(a3*(...))),
-# which is exactly what the default implementation of `prod` does, via `mapreduce`
-# (which maybe could be slightly optimized for BigInt).
++(x::BigInt, y::BigInt, rest::BigInt...) = sum(tuple(x, y, rest...))
+sum(arr::Union{AbstractArray{BigInt}, Tuple{BigInt, Vararg{BigInt}}}) =
+    foldl(MPZ.add!, arr; init=BigInt(0))
+
+function prod(arr::AbstractArray{BigInt})
+    # compute first the needed number of bits for the result,
+    # to avoid re-allocations;
+    # GMP will always request n+m limbs for the result in MPZ.mul!,
+    # if the arguments have n and m limbs; so we add all the bits
+    # taken by the array elements, and add BITS_PER_LIMB to that,
+    # to account for the rounding to limbs in MPZ.mul!
+    # (BITS_PER_LIMB-1 would typically be enough, to which we add
+    # 1 for the initial multiplication by init=1 in foldl)
+    nbits = GC.@preserve arr sum(arr; init=BITS_PER_LIMB) do x
+        abs(x.size) * BITS_PER_LIMB - leading_zeros(unsafe_load(x.d))
+    end
+    init = BigInt(; nbits)
+    MPZ.set_si!(init, 1)
+    foldl(MPZ.mul!, arr; init)
+end
 
 factorial(x::BigInt) = isneg(x) ? BigInt(0) : MPZ.fac_ui(x)
 
@@ -755,21 +769,17 @@ Base.add_with_overflow(a::BigInt, b::BigInt) = a + b, false
 Base.sub_with_overflow(a::BigInt, b::BigInt) = a - b, false
 Base.mul_with_overflow(a::BigInt, b::BigInt) = a * b, false
 
-function Base.deepcopy_internal(x::BigInt, stackdict::IdDict)
-    if haskey(stackdict, x)
-        return stackdict[x]
-    end
-    y = MPZ.set(x)
-    stackdict[x] = y
-    return y
-end
+Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), stackdict, x)
 
 ## streamlined hashing for BigInt, by avoiding allocation from shifts ##
 
 if Limb === UInt
     # this condition is true most (all?) of the time, and in this case we can define
-    # an optimized version of the above hash_integer(::Integer, ::UInt) method for BigInt
-    # used e.g. for Rational{BigInt}
+    # an optimized version for BigInt of hash_integer (used e.g. for Rational{BigInt}),
+    # and of hash
+
+    using .Base: hash_uint
+
     function hash_integer(n::BigInt, h::UInt)
         GC.@preserve n begin
             s = n.size
@@ -799,7 +809,7 @@ if Limb === UInt
                 limb <= typemin(Int) % UInt && return hash(-(limb % Int), h)
             end
             pow = trailing_zeros(x)
-            nd = ndigits0z(x, 2)
+            nd = Base.ndigits0z(x, 2)
             idx = _divLimb(pow) + 1
             shift = _modLimb(pow) % UInt
             upshift = BITS_PER_LIMB - shift
@@ -931,7 +941,7 @@ function Base.://(x::Rational{BigInt}, y::Rational{BigInt})
         if iszero(x.num)
             throw(DivideError())
         end
-        return (isneg(x.num) ? -one(BigFloat) : one(BigFloat)) // y.num
+        return (isneg(x.num) ? -one(BigInt) : one(BigInt)) // y.num
     end
     zq = _MPQ()
     ccall((:__gmpq_div, :libgmp), Cvoid,
diff --git a/base/hashing.jl b/base/hashing.jl
index 26b18f11c2fe28..746017f978dcb0 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -3,17 +3,19 @@
 ## hashing a single value ##
 
 """
-    hash(x[, h::UInt])
+    hash(x[, h::UInt]) -> UInt
 
 Compute an integer hash code such that `isequal(x,y)` implies `hash(x)==hash(y)`. The
 optional second argument `h` is a hash code to be mixed with the result.
 
 New types should implement the 2-argument form, typically by calling the 2-argument `hash`
 method recursively in order to mix hashes of the contents with each other (and with `h`).
-Typically, any type that implements `hash` should also implement its own `==` (hence
-`isequal`) to guarantee the property mentioned above. Types supporting subtraction
+Typically, any type that implements `hash` should also implement its own [`==`](@ref) (hence
+[`isequal`](@ref)) to guarantee the property mentioned above. Types supporting subtraction
 (operator `-`) should also implement [`widen`](@ref), which is required to hash
 values inside heterogeneous arrays.
+
+See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref).
 """
 hash(x::Any) = hash(x, zero(UInt))
 hash(w::WeakRef, h::UInt) = hash(w.value, h)
@@ -22,6 +24,8 @@ hash(w::WeakRef, h::UInt) = hash(w.value, h)
 
 hash(@nospecialize(x), h::UInt) = hash_uint(3h - objectid(x))
 
+hash(x::Symbol) = objectid(x)
+
 ## core data hashing functions ##
 
 function hash_64_64(n::UInt64)
diff --git a/base/iddict.jl b/base/iddict.jl
index a03edbb60723b9..7247a85c9afc80 100644
--- a/base/iddict.jl
+++ b/base/iddict.jl
@@ -3,7 +3,7 @@
 """
     IdDict([itr])
 
-`IdDict{K,V}()` constructs a hash table using object-id as hash and
+`IdDict{K,V}()` constructs a hash table using [`objectid`](@ref) as hash and
 `===` as equality with keys of type `K` and values of type `V`.
 
 See [`Dict`](@ref) for further help. In the example below, The `Dict`
diff --git a/base/indices.jl b/base/indices.jl
index 462e62c6d88834..28028f23c72a36 100644
--- a/base/indices.jl
+++ b/base/indices.jl
@@ -321,16 +321,16 @@ which they index. To support those cases, `to_indices(A, I)` calls
 given tuple of indices and the dimensional indices of `A` in tandem. As such,
 not all index types are guaranteed to propagate to `Base.to_index`.
 """
-to_indices(A, I::Tuple) = (@_inline_meta; to_indices(A, axes(A), I))
-to_indices(A, I::Tuple{Any}) = (@_inline_meta; to_indices(A, (eachindex(IndexLinear(), A),), I))
+to_indices(A, I::Tuple) = (@inline; to_indices(A, axes(A), I))
+to_indices(A, I::Tuple{Any}) = (@inline; to_indices(A, (eachindex(IndexLinear(), A),), I))
 # In simple cases, we know that we don't need to use axes(A), optimize those.
 # Having this here avoids invalidations from multidimensional.jl: to_indices(A, I::Tuple{Vararg{Union{Integer, CartesianIndex}}})
 to_indices(A, I::Tuple{}) = ()
 to_indices(A, I::Tuple{Vararg{Int}}) = I
-to_indices(A, I::Tuple{Vararg{Integer}}) = (@_inline_meta; to_indices(A, (), I))
+to_indices(A, I::Tuple{Vararg{Integer}}) = (@inline; to_indices(A, (), I))
 to_indices(A, inds, ::Tuple{}) = ()
 to_indices(A, inds, I::Tuple{Any, Vararg{Any}}) =
-    (@_inline_meta; (to_index(A, I[1]), to_indices(A, _maybetail(inds), tail(I))...))
+    (@inline; (to_index(A, I[1]), to_indices(A, _maybetail(inds), tail(I))...))
 
 _maybetail(::Tuple{}) = ()
 _maybetail(t::Tuple) = tail(t)
@@ -352,20 +352,17 @@ struct Slice{T<:AbstractUnitRange} <: AbstractUnitRange{Int}
 end
 Slice(S::Slice) = S
 axes(S::Slice) = (IdentityUnitRange(S.indices),)
-unsafe_indices(S::Slice) = (IdentityUnitRange(S.indices),)
 axes1(S::Slice) = IdentityUnitRange(S.indices)
 axes(S::Slice{<:OneTo}) = (S.indices,)
-unsafe_indices(S::Slice{<:OneTo}) = (S.indices,)
 axes1(S::Slice{<:OneTo}) = S.indices
 
 first(S::Slice) = first(S.indices)
 last(S::Slice) = last(S.indices)
 size(S::Slice) = (length(S.indices),)
 length(S::Slice) = length(S.indices)
-unsafe_length(S::Slice) = unsafe_length(S.indices)
-getindex(S::Slice, i::Int) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::Slice, i::AbstractUnitRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::Slice, i::StepRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
 show(io::IO, r::Slice) = print(io, "Base.Slice(", r.indices, ")")
 iterate(S::Slice, s...) = iterate(S.indices, s...)
 
@@ -383,23 +380,24 @@ end
 IdentityUnitRange(S::IdentityUnitRange) = S
 # IdentityUnitRanges are offset and thus have offset axes, so they are their own axes
 axes(S::IdentityUnitRange) = (S,)
-unsafe_indices(S::IdentityUnitRange) = (S,)
 axes1(S::IdentityUnitRange) = S
 axes(S::IdentityUnitRange{<:OneTo}) = (S.indices,)
-unsafe_indices(S::IdentityUnitRange{<:OneTo}) = (S.indices,)
 axes1(S::IdentityUnitRange{<:OneTo}) = S.indices
 
 first(S::IdentityUnitRange) = first(S.indices)
 last(S::IdentityUnitRange) = last(S.indices)
 size(S::IdentityUnitRange) = (length(S.indices),)
 length(S::IdentityUnitRange) = length(S.indices)
-unsafe_length(S::IdentityUnitRange) = unsafe_length(S.indices)
-getindex(S::IdentityUnitRange, i::Int) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
 show(io::IO, r::IdentityUnitRange) = print(io, "Base.IdentityUnitRange(", r.indices, ")")
 iterate(S::IdentityUnitRange, s...) = iterate(S.indices, s...)
 
+# For OneTo, the values and indices of the values are identical, so this may be defined in Base.
+# In general such an indexing operation would produce offset ranges
+getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}}) = (@inline; @boundscheck checkbounds(S, I); I)
+
 """
     LinearIndices(A::AbstractArray)
 
@@ -475,14 +473,14 @@ convert(::Type{LinearIndices{N,R}}, inds::LinearIndices{N}) where {N,R} =
 # AbstractArray implementation
 IndexStyle(::Type{<:LinearIndices}) = IndexLinear()
 axes(iter::LinearIndices) = map(axes1, iter.indices)
-size(iter::LinearIndices) = map(unsafe_length, iter.indices)
+size(iter::LinearIndices) = map(length, iter.indices)
 function getindex(iter::LinearIndices, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(iter, i)
     i
 end
 function getindex(iter::LinearIndices, i::AbstractRange{<:Integer})
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(iter, i)
     @inbounds isa(iter, LinearIndices{1}) ? iter.indices[1][i] : (first(iter):last(iter))[i]
 end
@@ -493,6 +491,6 @@ iterate(iter::LinearIndices, i=1) = i > length(iter) ? nothing : (i, i+1)
 
 # Needed since firstindex and lastindex are defined in terms of LinearIndices
 first(iter::LinearIndices) = 1
-first(iter::LinearIndices{1}) = (@_inline_meta; first(axes1(iter.indices[1])))
-last(iter::LinearIndices) = (@_inline_meta; length(iter))
-last(iter::LinearIndices{1}) = (@_inline_meta; last(axes1(iter.indices[1])))
+first(iter::LinearIndices{1}) = (@inline; first(axes1(iter.indices[1])))
+last(iter::LinearIndices) = (@inline; length(iter))
+last(iter::LinearIndices{1}) = (@inline; last(axes1(iter.indices[1])))
diff --git a/base/initdefs.jl b/base/initdefs.jl
index a61373c394eece..2cac786cfd1940 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -81,8 +81,7 @@ Here is an overview of some of the subdirectories that may exist in a depot:
 * `packages`: Contains packages, some of which were explicitly installed and some which are implicit dependencies. Maintained by `Pkg.jl`.
 * `registries`: Contains package registries. By default only `General`. Maintained by `Pkg.jl`.
 
-See also:
-[`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
+See also [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
 [Code Loading](@ref code-loading).
 """
 const DEPOT_PATH = String[]
@@ -161,7 +160,7 @@ have special meanings:
 The fully expanded value of `LOAD_PATH` that is searched for projects and packages
 can be seen by calling the `Base.load_path()` function.
 
-See also:
+See also
 [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH),
 [`JULIA_PROJECT`](@ref JULIA_PROJECT),
 [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
@@ -307,7 +306,15 @@ function active_project(search_load_path::Bool=true)
     end
 end
 
+"""
+    load_path()
+
+Return the fully expanded value of [`LOAD_PATH`](@ref) that is searched for projects and
+packages.
+"""
 function load_path()
+    cache = LOADING_CACHE[]
+    cache !== nothing && return cache.load_path
     paths = String[]
     for env in LOAD_PATH
         path = load_path_expand(env)
diff --git a/base/int.jl b/base/int.jl
index 02a9ace0fe3f15..17410e9eb3f99c 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -154,6 +154,8 @@ when `abs` is applied to the minimum representable value of a signed
 integer. That is, when `x == typemin(typeof(x))`, `abs(x) == x < 0`,
 not `-x` as might be expected.
 
+See also: [`abs2`](@ref), [`unsigned`](@ref), [`sign`](@ref).
+
 # Examples
 ```jldoctest
 julia> abs(-3)
@@ -178,12 +180,17 @@ abs(x::Signed) = flipsign(x,x)
 
 Convert a number to an unsigned integer. If the argument is signed, it is reinterpreted as
 unsigned without checking for negative values.
+
+See also: [`signed`](@ref), [`sign`](@ref), [`signbit`](@ref).
+
 # Examples
 ```jldoctest
 julia> unsigned(-2)
 0xfffffffffffffffe
+
 julia> unsigned(2)
 0x0000000000000002
+
 julia> signed(unsigned(-2))
 -2
 ```
@@ -196,6 +203,8 @@ unsigned(x::BitSigned) = reinterpret(typeof(convert(Unsigned, zero(x))), x)
 
 Convert a number to a signed integer. If the argument is unsigned, it is reinterpreted as
 signed without checking for overflow.
+
+See also: [`unsigned`](@ref), [`sign`](@ref), [`signbit`](@ref).
 """
 signed(x) = x % typeof(convert(Signed, zero(x)))
 signed(x::BitUnsigned) = reinterpret(typeof(convert(Signed, zero(x))), x)
@@ -233,6 +242,8 @@ exceptions, see note below).
     type, and so rounding error may occur. In particular, if the exact result is very
     close to `y`, then it may be rounded to `y`.
 
+See also: [`rem`](@ref), [`div`](@ref), [`fld`](@ref), [`mod1`](@ref), [`invmod`](@ref).
+
 ```jldoctest
 julia> mod(8, 3)
 2
@@ -248,6 +259,10 @@ julia> mod(eps(), 3)
 
 julia> mod(-eps(), 3)
 3.0
+
+julia> mod.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ 1  2  0  1  2  0  1  2  0  1  2
 ```
 """
 function mod(x::T, y::T) where T<:Integer
@@ -272,6 +287,8 @@ rem(x::T, y::T) where {T<:BitUnsigned64} = checked_urem_int(x, y)
 
 Bitwise not.
 
+See also: [`!`](@ref), [`&`](@ref), [`|`](@ref).
+
 # Examples
 ```jldoctest
 julia> ~4
@@ -293,6 +310,8 @@ Bitwise and. Implements [three-valued logic](https://en.wikipedia.org/wiki/Three
 returning [`missing`](@ref) if one operand is `missing` and the other is `true`. Add parentheses for
 function application form: `(&)(x, y)`.
 
+See also: [`|`](@ref), [`xor`](@ref), [`&&`](@ref).
+
 # Examples
 ```jldoctest
 julia> 4 & 10
@@ -316,6 +335,8 @@ false
 Bitwise or. Implements [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
 returning [`missing`](@ref) if one operand is `missing` and the other is `false`.
 
+See also: [`&`](@ref), [`xor`](@ref), [`||`](@ref).
+
 # Examples
 ```jldoctest
 julia> 4 | 10
@@ -369,6 +390,9 @@ Number of ones in the binary representation of `x`.
 ```jldoctest
 julia> count_ones(7)
 3
+
+julia> count_ones(Int32(-1))
+32
 ```
 """
 count_ones(x::BitInteger) = (ctpop_int(x) % Int)::Int
@@ -408,6 +432,9 @@ Number of zeros in the binary representation of `x`.
 ```jldoctest
 julia> count_zeros(Int32(2 ^ 16 - 1))
 16
+
+julia> count_zeros(-1)
+0
 ```
 """
 count_zeros(x::Integer) = count_ones(~x)
@@ -498,6 +525,8 @@ A negative value of `k` will rotate to the right instead.
 !!! compat "Julia 1.5"
     This function requires Julia 1.5 or later.
 
+See also: [`<<`](@ref), [`circshift`](@ref), [`BitArray`](@ref).
+
 ```jldoctest
 julia> bitrotate(UInt8(114), 2)
 0xc9
@@ -554,12 +583,26 @@ unsafe_trunc(::Type{T}, x::Integer) where {T<:Integer} = rem(x, T)
     trunc(x; sigdigits::Integer= [, base = 10])
 
 `trunc(x)` returns the nearest integral value of the same type as `x` whose absolute value
-is less than or equal to `x`.
+is less than or equal to the absolute value of `x`.
 
 `trunc(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
 not representable.
 
-`digits`, `sigdigits` and `base` work as for [`round`](@ref).
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+See also: [`%`](@ref rem), [`floor`](@ref), [`unsigned`](@ref), [`unsafe_trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> trunc(2.22)
+2.0
+
+julia> trunc(-2.22, digits=1)
+-2.2
+
+julia> trunc(Int, -2.22)
+-2
+```
 """
 function trunc end
 
@@ -574,7 +617,7 @@ equal to `x`.
 `floor(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
 not representable.
 
-`digits`, `sigdigits` and `base` work as for [`round`](@ref).
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
 """
 function floor end
 
@@ -589,7 +632,7 @@ equal to `x`.
 `ceil(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is not
 representable.
 
-`digits`, `sigdigits` and `base` work as for [`round`](@ref).
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
 """
 function ceil end
 
@@ -702,6 +745,8 @@ function typemin end
 
 The highest value representable by the given (real) numeric `DataType`.
 
+See also: [`floatmax`](@ref), [`typemin`](@ref), [`eps`](@ref).
+
 # Examples
 ```jldoctest
 julia> typemax(Int8)
@@ -709,6 +754,12 @@ julia> typemax(Int8)
 
 julia> typemax(UInt32)
 0xffffffff
+
+julia> typemax(Float64)
+Inf
+
+julia> floatmax(Float32)  # largest finite floating point number
+3.4028235f38
 ```
 """
 function typemax end
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 35140a81367e6c..169bbe313a620f 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -47,11 +47,22 @@ function gcd(a::T, b::T) where T<:Integer
     checked_abs(a)
 end
 
-# binary GCD (aka Stein's) algorithm
-# about 1.7x (2.1x) faster for random Int64s (Int128s)
 function gcd(a::T, b::T) where T<:BitInteger
     a == 0 && return checked_abs(b)
     b == 0 && return checked_abs(a)
+    r = _gcd(a, b)
+    signbit(r) && __throw_gcd_overflow(a, b)
+    return r
+end
+@noinline __throw_gcd_overflow(a, b) = throw(OverflowError("gcd($a, $b) overflows"))
+
+# binary GCD (aka Stein's) algorithm
+# about 1.7x (2.1x) faster for random Int64s (Int128s)
+# Unfortunately, we need to manually annotate this as `@pure` to work around #41694. Since
+# this is used in the Rational constructor, constant prop is something we do care about here.
+# This does call generic functions, so it might not be completely sound, but since `_gcd` is
+# restricted to BitIntegers, it is probably fine in practice.
+@pure function _gcd(a::T, b::T) where T<:BitInteger
     za = trailing_zeros(a)
     zb = trailing_zeros(b)
     k = min(za, zb)
@@ -65,11 +76,8 @@ function gcd(a::T, b::T) where T<:BitInteger
         v >>= trailing_zeros(v)
     end
     r = u << k
-    # T(r) would throw InexactError; we want OverflowError instead
-    r > typemax(T) && __throw_gcd_overflow(a, b)
-    r % T
+    return r % T
 end
-@noinline __throw_gcd_overflow(a, b) = throw(OverflowError("gcd($a, $b) overflows"))
 
 """
     lcm(x, y...)
@@ -117,8 +125,9 @@ function lcm(a::T, b::T) where T<:Integer
     end
 end
 
-gcd(a::Union{Integer,Rational}) = a
-lcm(a::Union{Integer,Rational}) = a
+gcd(a::Integer) = checked_abs(a)
+gcd(a::Rational) = checked_abs(a.num) // a.den
+lcm(a::Union{Integer,Rational}) = gcd(a)
 gcd(a::Unsigned, b::Signed) = gcd(promote(a, abs(b))...)
 gcd(a::Signed, b::Unsigned) = gcd(promote(abs(a), b)...)
 gcd(a::Real, b::Real) = gcd(promote(a,b)...)
@@ -303,8 +312,7 @@ end
 const HWReal = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float32,Float64}
 const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 
-# Core.Compiler has complicated logic to inline x^2 and x^3 for
-# numeric types.  In terms of Val we can do it much more simply.
+# Inline x^2 and x^3 for Val
 # (The first argument prevents unexpected behavior if a function ^
 # is defined that is not equal to Base.^)
 @inline literal_pow(::typeof(^), x::HWNumber, ::Val{0}) = one(x)
@@ -318,11 +326,11 @@ const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 
 # for other types, define x^-n as inv(x)^n so that negative literal powers can
 # be computed in a type-stable way even for e.g. integers.
-@inline @generated function literal_pow(f::typeof(^), x, ::Val{p}) where {p}
+@inline function literal_pow(f::typeof(^), x, ::Val{p}) where {p}
     if p < 0
-        :(literal_pow(^, inv(x), $(Val{-p}())))
+        literal_pow(^, inv(x), Val(-p))
     else
-        :(f(x,$p))
+        f(x, p)
     end
 end
 
@@ -388,6 +396,8 @@ _prevpow2(x::Integer) = reinterpret(typeof(x),x < 0 ? -_prevpow2(unsigned(-x)) :
 
 Test whether `n` is an integer power of two.
 
+See also [`count_ones`](@ref), [`prevpow`](@ref), [`nextpow`](@ref).
+
 # Examples
 ```jldoctest
 julia> ispow2(4)
@@ -419,6 +429,8 @@ ispow2(x::Integer) = x > 0 && count_ones(x) == 1
 The smallest `a^n` not less than `x`, where `n` is a non-negative integer. `a` must be
 greater than 1, and `x` must be greater than 0.
 
+See also [`prevpow`](@ref).
+
 # Examples
 ```jldoctest
 julia> nextpow(2, 7)
@@ -433,8 +445,6 @@ julia> nextpow(5, 20)
 julia> nextpow(4, 16)
 16
 ```
-
-See also [`prevpow`](@ref).
 """
 function nextpow(a::Real, x::Real)
     x <= 0 && throw(DomainError(x, "`x` must be positive."))
@@ -456,6 +466,8 @@ end
 The largest `a^n` not greater than `x`, where `n` is a non-negative integer.
 `a` must be greater than 1, and `x` must not be less than 1.
 
+See also [`nextpow`](@ref), [`isqrt`](@ref).
+
 # Examples
 ```jldoctest
 julia> prevpow(2, 7)
@@ -470,7 +482,6 @@ julia> prevpow(5, 20)
 julia> prevpow(4, 16)
 16
 ```
-See also [`nextpow`](@ref).
 """
 function prevpow(a::Real, x::Real)
     x < 1 && throw(DomainError(x, "`x` must be ≥ 1."))
@@ -612,6 +623,8 @@ Compute the number of digits in integer `n` written in base `base`
 (`base` must not be in `[-1, 0, 1]`), optionally padded with zeros
 to a specified size (the result will never be less than `pad`).
 
+See also [`digits`](@ref), [`count_ones`](@ref).
+
 # Examples
 ```jldoctest
 julia> ndigits(12345)
@@ -625,6 +638,9 @@ julia> string(1022, base=16)
 
 julia> ndigits(123, pad=5)
 5
+
+julia> ndigits(-123)
+3
 ```
 """
 ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, base))
@@ -749,12 +765,15 @@ split_sign(n::Unsigned) = n, false
 Convert an integer `n` to a string in the given `base`,
 optionally specifying a number of digits to pad to.
 
+See also [`digits`](@ref), [`bitstring`](@ref), [`count_zeros`](@ref).
+
+# Examples
 ```jldoctest
 julia> string(5, base = 13, pad = 4)
 "0005"
 
-julia> string(13, base = 5, pad = 4)
-"0023"
+julia> string(-13, base = 5, pad = 4)
+"-0023"
 ```
 """
 function string(n::Integer; base::Integer = 10, pad::Integer = 1)
@@ -781,24 +800,36 @@ string(b::Bool) = b ? "true" : "false"
 """
     bitstring(n)
 
-A string giving the literal bit representation of a number.
+A string giving the literal bit representation of a primitive type.
+
+See also [`count_ones`](@ref), [`count_zeros`](@ref), [`digits`](@ref).
 
 # Examples
 ```jldoctest
-julia> bitstring(4)
-"0000000000000000000000000000000000000000000000000000000000000100"
+julia> bitstring(Int32(4))
+"00000000000000000000000000000100"
 
 julia> bitstring(2.2)
 "0100000000000001100110011001100110011001100110011001100110011010"
 ```
 """
-function bitstring end
-
-bitstring(x::Union{Bool,Int8,UInt8})           = string(reinterpret(UInt8,x), pad = 8, base = 2)
-bitstring(x::Union{Int16,UInt16,Float16})      = string(reinterpret(UInt16,x), pad = 16, base = 2)
-bitstring(x::Union{Char,Int32,UInt32,Float32}) = string(reinterpret(UInt32,x), pad = 32, base = 2)
-bitstring(x::Union{Int64,UInt64,Float64})      = string(reinterpret(UInt64,x), pad = 64, base = 2)
-bitstring(x::Union{Int128,UInt128})            = string(reinterpret(UInt128,x), pad = 128, base = 2)
+function bitstring(x::T) where {T}
+    isprimitivetype(T) || throw(ArgumentError("$T not a primitive type"))
+    sz = sizeof(T) * 8
+    str = StringVector(sz)
+    i = sz
+    @inbounds while i >= 4
+        b = UInt32(sizeof(T) == 1 ? bitcast(UInt8, x) : trunc_int(UInt8, x))
+        d = 0x30303030 + ((b * 0x08040201) >> 0x3) & 0x01010101
+        str[i-3] = (d >> 0x00) % UInt8
+        str[i-2] = (d >> 0x08) % UInt8
+        str[i-1] = (d >> 0x10) % UInt8
+        str[i]   = (d >> 0x18) % UInt8
+        x = lshr_int(x, 4)
+        i -= 4
+    end
+    return String(str)
+end
 
 """
     digits([T<:Integer], n::Integer; base::T = 10, pad::Integer = 1)
@@ -807,9 +838,12 @@ Return an array with element type `T` (default `Int`) of the digits of `n` in th
 base, optionally padded with zeros to a specified size. More significant digits are at
 higher indices, such that `n == sum(digits[k]*base^(k-1) for k=1:length(digits))`.
 
+See also [`ndigits`](@ref), [`digits!`](@ref),
+and for base 2 also [`bitstring`](@ref), [`count_ones`](@ref).
+
 # Examples
 ```jldoctest
-julia> digits(10, base = 10)
+julia> digits(10)
 2-element Vector{Int64}:
  0
  1
@@ -821,14 +855,18 @@ julia> digits(10, base = 2)
  0
  1
 
-julia> digits(10, base = 2, pad = 6)
-6-element Vector{Int64}:
- 0
- 1
- 0
- 1
- 0
- 0
+julia> digits(-256, base = 10, pad = 5)
+5-element Vector{Int64}:
+ -6
+ -5
+ -2
+  0
+  0
+
+julia> n = rand(-999:999);
+
+julia> n == evalpoly(13, digits(n, base = 13))
+true
 ```
 """
 digits(n::Integer; base::Integer = 10, pad::Integer = 1) =
@@ -934,6 +972,8 @@ Factorial of `n`. If `n` is an [`Integer`](@ref), the factorial is computed as a
 integer (promoted to at least 64 bits). Note that this may overflow if `n` is not small,
 but you can use `factorial(big(n))` to compute the result exactly in arbitrary precision.
 
+See also [`binomial`](@ref).
+
 # Examples
 ```jldoctest
 julia> factorial(6)
@@ -948,9 +988,6 @@ julia> factorial(big(21))
 51090942171709440000
 ```
 
-# See also
-* [`binomial`](@ref)
-
 # External links
 * [Factorial](https://en.wikipedia.org/wiki/Factorial) on Wikipedia.
 """
@@ -980,6 +1017,8 @@ If ``n`` is negative, then it is defined in terms of the identity
 \\binom{n}{k} = (-1)^k \\binom{k-n-1}{k}
 ```
 
+See also [`factorial`](@ref).
+
 # Examples
 ```jldoctest
 julia> binomial(5, 3)
@@ -992,9 +1031,6 @@ julia> binomial(-5, 3)
 -35
 ```
 
-# See also
-* [`factorial`](@ref)
-
 # External links
 * [Binomial coefficient](https://en.wikipedia.org/wiki/Binomial_coefficient) on Wikipedia.
 """
diff --git a/base/io.jl b/base/io.jl
index dc588a8efd0e99..9e7248c55c43ad 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -15,7 +15,7 @@ struct EOFError <: Exception end
 A system call failed with an error code (in the `errno` global variable).
 """
 struct SystemError <: Exception
-    prefix::AbstractString
+    prefix::String
     errnum::Int32
     extrainfo
     SystemError(p::AbstractString, e::Integer, extrainfo) = new(p, e, extrainfo)
@@ -60,9 +60,49 @@ function isopen end
 Close an I/O stream. Performs a [`flush`](@ref) first.
 """
 function close end
+
+"""
+    closewrite(stream)
+
+Shutdown the write half of a full-duplex I/O stream. Performs a [`flush`](@ref)
+first. Notify the other end that no more data will be written to the underlying
+file. This is not supported by all IO types.
+
+# Examples
+```jldoctest
+julia> io = Base.BufferStream(); # this never blocks, so we can read and write on the same Task
+
+julia> write(io, "request");
+
+julia> # calling `read(io)` here would block forever
+
+julia> closewrite(io);
+
+julia> read(io, String)
+"request"
+"""
+function closewrite end
+
+"""
+    flush(stream)
+
+Commit all currently buffered writes to the given stream.
+"""
 function flush end
-function wait_readnb end
-function wait_close end
+
+"""
+    bytesavailable(io)
+
+Return the number of bytes available for reading before a read from this stream or buffer will block.
+
+# Examples
+```jldoctest
+julia> io = IOBuffer("JuliaLang is a GitHub organization");
+
+julia> bytesavailable(io)
+34
+```
+"""
 function bytesavailable end
 
 """
@@ -81,7 +121,7 @@ function readavailable end
 """
     isreadable(io) -> Bool
 
-Return `true` if the specified IO object is readable (if that can be determined).
+Return `false` if the specified IO object is not readable.
 
 # Examples
 ```jldoctest
@@ -99,12 +139,12 @@ true
 julia> rm("myfile.txt")
 ```
 """
-function isreadable end
+isreadable(io::IO) = isopen(io)
 
 """
     iswritable(io) -> Bool
 
-Return `true` if the specified IO object is writable (if that can be determined).
+Return `false` if the specified IO object is not writable.
 
 # Examples
 ```jldoctest
@@ -122,10 +162,23 @@ false
 julia> rm("myfile.txt")
 ```
 """
-function iswritable end
-function copy end
+iswritable(io::IO) = isopen(io)
+
+"""
+    eof(stream) -> Bool
+
+Test whether an I/O stream is at end-of-file. If the stream is not yet exhausted, this
+function will block to wait for more data if necessary, and then return `false`. Therefore
+it is always safe to read one byte after seeing `eof` return `false`. `eof` will return
+`false` as long as buffered data is still available, even if the remote end of a connection
+is closed.
+"""
 function eof end
 
+function copy end
+function wait_readnb end
+function wait_close end
+
 """
     read(io::IO, T)
 
@@ -357,65 +410,37 @@ end
 function pipe_reader end
 function pipe_writer end
 
+for f in (:flush, :closewrite, :iswritable)
+    @eval $(f)(io::AbstractPipe) = $(f)(pipe_writer(io)::IO)
+end
 write(io::AbstractPipe, byte::UInt8) = write(pipe_writer(io)::IO, byte)
 write(to::IO, from::AbstractPipe) = write(to, pipe_reader(from))
 unsafe_write(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_write(pipe_writer(io)::IO, p, nb)::Union{Int,UInt}
 buffer_writes(io::AbstractPipe, args...) = buffer_writes(pipe_writer(io)::IO, args...)
-flush(io::AbstractPipe) = flush(pipe_writer(io)::IO)
 
+for f in (
+        # peek/mark interface
+        :mark, :unmark, :reset, :ismarked,
+        # Simple reader functions
+        :read, :readavailable, :bytesavailable, :reseteof, :isreadable)
+    @eval $(f)(io::AbstractPipe) = $(f)(pipe_reader(io)::IO)
+end
 read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io)::IO, byte)::UInt8
 unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io)::IO, p, nb)
-read(io::AbstractPipe) = read(pipe_reader(io)::IO)
 readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io)::IO, target, keep, out)
 readbytes!(io::AbstractPipe, target::AbstractVector{UInt8}, n=length(target)) = readbytes!(pipe_reader(io)::IO, target, n)
-
-for f in (
-        # peek/mark interface
-        :mark, :unmark, :reset, :ismarked,
-        # Simple reader functions
-        :readavailable, :isreadable)
-    @eval $(f)(io::AbstractPipe) = $(f)(pipe_reader(io)::IO)
-end
 peek(io::AbstractPipe, ::Type{T}) where {T} = peek(pipe_reader(io)::IO, T)::T
+wait_readnb(io::AbstractPipe, nb::Int) = wait_readnb(pipe_reader(io)::IO, nb)
+eof(io::AbstractPipe) = eof(pipe_reader(io)::IO)::Bool
 
-iswritable(io::AbstractPipe) = iswritable(pipe_writer(io)::IO)
 isopen(io::AbstractPipe) = isopen(pipe_writer(io)::IO) || isopen(pipe_reader(io)::IO)
 close(io::AbstractPipe) = (close(pipe_writer(io)::IO); close(pipe_reader(io)::IO))
-wait_readnb(io::AbstractPipe, nb::Int) = wait_readnb(pipe_reader(io)::IO, nb)
 wait_close(io::AbstractPipe) = (wait_close(pipe_writer(io)::IO); wait_close(pipe_reader(io)::IO))
 
-"""
-    bytesavailable(io)
-
-Return the number of bytes available for reading before a read from this stream or buffer will block.
-
-# Examples
-```jldoctest
-julia> io = IOBuffer("JuliaLang is a GitHub organization");
-
-julia> bytesavailable(io)
-34
-```
-"""
-bytesavailable(io::AbstractPipe) = bytesavailable(pipe_reader(io)::IO)
-bytesavailable(io::DevNull) = 0
-
-"""
-    eof(stream) -> Bool
-
-Test whether an I/O stream is at end-of-file. If the stream is not yet exhausted, this
-function will block to wait for more data if necessary, and then return `false`. Therefore
-it is always safe to read one byte after seeing `eof` return `false`. `eof` will return
-`false` as long as buffered data is still available, even if the remote end of a connection
-is closed.
-"""
-eof(io::AbstractPipe) = eof(pipe_reader(io)::IO)::Bool
-reseteof(io::AbstractPipe) = reseteof(pipe_reader(io)::IO)
-
 
 # Exception-safe wrappers (io = open(); try f(io) finally close(io))
 
@@ -885,8 +910,9 @@ end
 
 function readuntil(io::IO, target::AbstractString; keep::Bool=false)
     # small-string target optimizations
-    isempty(target) && return ""
-    c, rest = Iterators.peel(target)
+    x = Iterators.peel(target)
+    isnothing(x) && return ""
+    c, rest = x
     if isempty(rest) && c <= '\x7f'
         return readuntil_string(io, c % UInt8, keep)
     end
@@ -1034,7 +1060,7 @@ end
 
 Return an iterable object yielding [`read(io, T)`](@ref).
 
-See also: [`skipchars`](@ref), [`eachline`](@ref), [`readuntil`](@ref)
+See also [`skipchars`](@ref), [`eachline`](@ref), [`readuntil`](@ref).
 
 !!! compat "Julia 1.6"
     `readeach` requires Julia 1.6 or later.
@@ -1118,11 +1144,6 @@ ismarked(io::IO) = io.mark >= 0
 # Make sure all IO streams support flush, even if only as a no-op,
 # to make it easier to write generic I/O code.
 
-"""
-    flush(stream)
-
-Commit all currently buffered writes to the given stream.
-"""
 flush(io::IO) = nothing
 
 """
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index 8df6af0087137d..e08a019d84a2ca 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -334,6 +334,12 @@ end
 
 eof(io::GenericIOBuffer) = (io.ptr-1 == io.size)
 
+function closewrite(io::GenericIOBuffer)
+    io.writable = false
+    # OR throw(_UVError("closewrite", UV_ENOTSOCK))
+    nothing
+end
+
 @noinline function close(io::GenericIOBuffer{T}) where T
     io.readable = false
     io.writable = false
@@ -353,8 +359,7 @@ isopen(io::GenericIOBuffer) = io.readable || io.writable || io.seekable || bytes
 """
     take!(b::IOBuffer)
 
-Obtain the contents of an `IOBuffer` as an array, without copying. Afterwards, the
-`IOBuffer` is reset to its initial state.
+Obtain the contents of an `IOBuffer` as an array. Afterwards, the `IOBuffer` is reset to its initial state.
 
 # Examples
 ```jldoctest
diff --git a/base/iostream.jl b/base/iostream.jl
index 35255752bd1b50..98f15fd8a7db75 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -13,7 +13,7 @@ Mostly used to represent files returned by [`open`](@ref).
 mutable struct IOStream <: IO
     handle::Ptr{Cvoid}
     ios::Array{UInt8,1}
-    name::AbstractString
+    name::String
     mark::Int64
     lock::ReentrantLock
     _dolock::Bool
diff --git a/base/irrationals.jl b/base/irrationals.jl
index 545d9091ee8402..f3a9817f1ee354 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -22,7 +22,9 @@ abstract type AbstractIrrational <: Real end
     Irrational{sym} <: AbstractIrrational
 
 Number type representing an exact irrational value denoted by the
-symbol `sym`.
+symbol `sym`, such as [`π`](@ref pi), [`ℯ`](@ref) and [`γ`](@ref Base.MathConstants.eulergamma).
+
+See also [`@irrational`], [`AbstractIrrational`](@ref).
 """
 struct Irrational{sym} <: AbstractIrrational end
 
@@ -201,7 +203,7 @@ big(::Type{<:AbstractIrrational}) = BigFloat
 function alignment(io::IO, x::AbstractIrrational)
     m = match(r"^(.*?)(=.*)$", sprint(show, x, context=io, sizehint=0))
     m === nothing ? (length(sprint(show, x, context=io, sizehint=0)), 0) :
-    (length(m.captures[1]), length(m.captures[2]))
+    (length(something(m.captures[1])), length(something(m.captures[2])))
 end
 
 # inv
diff --git a/base/iterators.jl b/base/iterators.jl
index eebffed16dcfbe..c0a68a36d836e2 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -221,7 +221,7 @@ CartesianIndex(1, 2) d
 CartesianIndex(2, 2) e
 ```
 
-See also: [`IndexStyle`](@ref), [`axes`](@ref).
+See also [`IndexStyle`](@ref), [`axes`](@ref).
 """
 pairs(::IndexLinear,    A::AbstractArray) = Pairs(A, LinearIndices(A))
 pairs(::IndexCartesian, A::AbstractArray) = Pairs(A, CartesianIndices(axes(A)))
@@ -235,30 +235,30 @@ pairs(A::AbstractArray)  = pairs(IndexCartesian(), A)
 pairs(A::AbstractVector) = pairs(IndexLinear(), A)
 # pairs(v::Pairs) = v # listed for reference, but already defined from being an AbstractDict
 
-length(v::Pairs) = length(v.itr)
-axes(v::Pairs) = axes(v.itr)
-size(v::Pairs) = size(v.itr)
+length(v::Pairs) = length(getfield(v, :itr))
+axes(v::Pairs) = axes(getfield(v, :itr))
+size(v::Pairs) = size(getfield(v, :itr))
 @propagate_inbounds function iterate(v::Pairs{K, V}, state...) where {K, V}
-    x = iterate(v.itr, state...)
+    x = iterate(getfield(v, :itr), state...)
     x === nothing && return x
     indx, n = x
-    item = v.data[indx]
+    item = getfield(v, :data)[indx]
     return (Pair{K, V}(indx, item), n)
 end
-@inline isdone(v::Pairs, state...) = isdone(v.itr, state...)
+@inline isdone(v::Pairs, state...) = isdone(getfield(v, :itr), state...)
 
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, I}}) where {I} = IteratorSize(I)
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, <:Base.AbstractUnitRange, <:Tuple}}) = HasLength()
 
-reverse(v::Pairs) = Pairs(v.data, reverse(v.itr))
+reverse(v::Pairs) = Pairs(getfield(v, :data), reverse(getfield(v, :itr)))
 
-haskey(v::Pairs, key) = (key in v.itr)
-keys(v::Pairs) = v.itr
-values(v::Pairs) = v.data # TODO: this should be a view of data subset by itr
-getindex(v::Pairs, key) = v.data[key]
-setindex!(v::Pairs, value, key) = (v.data[key] = value; v)
-get(v::Pairs, key, default) = get(v.data, key, default)
-get(f::Base.Callable, v::Pairs, key) = get(f, v.data, key)
+haskey(v::Pairs, key) = (key in getfield(v, :itr))
+keys(v::Pairs) = getfield(v, :itr)
+values(v::Pairs) = getfield(v, :data) # TODO: this should be a view of data subset by itr
+getindex(v::Pairs, key) = getfield(v, :data)[key]
+setindex!(v::Pairs, value, key) = (getfield(v, :data)[key] = value; v)
+get(v::Pairs, key, default) = get(getfield(v, :data), key, default)
+get(f::Base.Callable, v::Pairs, key) = get(f, getfield(v, :data), key)
 
 # zip
 
@@ -276,6 +276,8 @@ the `zip` iterator is a tuple of values of its subiterators.
     `zip` orders the calls to its subiterators in such a way that stateful iterators will
     not advance when another iterator finishes in the current iteration.
 
+See also: [`enumerate`](@ref), [`splat`](@ref Base.splat).
+
 # Examples
 ```jldoctest
 julia> a = 1:5
@@ -429,6 +431,12 @@ julia> foreach(println, f)
 1
 3
 5
+
+julia> [x for x in [1, 2, 3, 4, 5] if isodd(x)]  # collects a generator over Iterators.filter
+3-element Vector{Int64}:
+ 1
+ 3
+ 5
 ```
 """
 filter(flt, itr) = Filter(flt, itr)
@@ -526,6 +534,8 @@ end
 
 An iterator that yields the same elements as `iter`, but starting at the given `state`.
 
+See also: [`Iterators.drop`](@ref), [`Iterators.peel`](@ref), [`Base.rest`](@ref).
+
 # Examples
 ```jldoctest
 julia> collect(Iterators.rest([1,2,3,4], 2))
@@ -544,6 +554,13 @@ rest(itr) = itr
 
 Returns the first element and an iterator over the remaining elements.
 
+If the iterator is empty return `nothing` (like `iterate`).
+
+!!! compat "Julia 1.7"
+    Prior versions throw a BoundsError if the iterator is empty.
+
+See also: [`Iterators.drop`](@ref), [`Iterators.take`](@ref).
+
 # Examples
 ```jldoctest
 julia> (a, rest) = Iterators.peel("abc");
@@ -559,7 +576,7 @@ julia> collect(rest)
 """
 function peel(itr)
     y = iterate(itr)
-    y === nothing && throw(BoundsError())
+    y === nothing && return y
     val, s = y
     val, rest(itr, s)
 end
@@ -622,6 +639,8 @@ end
 
 An iterator that generates at most the first `n` elements of `iter`.
 
+See also: [`drop`](@ref Iterators.drop), [`peel`](@ref Iterators.peel), [`first`](@ref), [`take!`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = 1:2:11
@@ -833,6 +852,8 @@ end
 An iterator that cycles through `iter` forever.
 If `iter` is empty, so is `cycle(iter)`.
 
+See also: [`Iterators.repeated`](@ref), [`repeat`](@ref).
+
 # Examples
 ```jldoctest
 julia> for (i, v) in enumerate(Iterators.cycle("hello"))
@@ -872,6 +893,8 @@ repeated(x) = Repeated(x)
 An iterator that generates the value `x` forever. If `n` is specified, generates `x` that
 many times (equivalent to `take(repeated(x), n)`).
 
+See also: [`Iterators.cycle`](@ref), [`repeat`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = Iterators.repeated([1 2], 4);
@@ -907,12 +930,17 @@ Return an iterator over the product of several iterators. Each generated element
 a tuple whose `i`th element comes from the `i`th argument iterator. The first iterator
 changes the fastest.
 
+See also: [`zip`](@ref), [`Iterators.flatten`](@ref).
+
 # Examples
 ```jldoctest
 julia> collect(Iterators.product(1:2, 3:5))
 2×3 Matrix{Tuple{Int64, Int64}}:
  (1, 3)  (1, 4)  (1, 5)
  (2, 3)  (2, 4)  (2, 5)
+
+julia> ans == [(x,y) for x in 1:2, y in 3:5]  # collects a generator involving Iterators.product
+true
 ```
 """
 product(iters...) = ProductIterator(iters)
@@ -1042,6 +1070,15 @@ julia> collect(Iterators.flatten((1:2, 8:9)))
  2
  8
  9
+
+julia> [(x,y) for x in 0:1 for y in 'a':'c']  # collects generators involving Iterators.flatten
+6-element Vector{Tuple{Int64, Char}}:
+ (0, 'a')
+ (0, 'b')
+ (0, 'c')
+ (1, 'a')
+ (1, 'b')
+ (1, 'c')
 ```
 """
 flatten(itr) = Flatten(itr)
@@ -1139,26 +1176,26 @@ function length(itr::PartitionIterator)
     return cld(l, itr.n)
 end
 
-function iterate(itr::PartitionIterator{<:AbstractRange}, state=1)
-    state > length(itr.c) && return nothing
-    r = min(state + itr.n - 1, length(itr.c))
+function iterate(itr::PartitionIterator{<:AbstractRange}, state = firstindex(itr.c))
+    state > lastindex(itr.c) && return nothing
+    r = min(state + itr.n - 1, lastindex(itr.c))
     return @inbounds itr.c[state:r], r + 1
 end
 
-function iterate(itr::PartitionIterator{<:AbstractArray}, state=1)
-    state > length(itr.c) && return nothing
-    r = min(state + itr.n - 1, length(itr.c))
+function iterate(itr::PartitionIterator{<:AbstractArray}, state = firstindex(itr.c))
+    state > lastindex(itr.c) && return nothing
+    r = min(state + itr.n - 1, lastindex(itr.c))
     return @inbounds view(itr.c, state:r), r + 1
 end
 
 struct IterationCutShort; end
 
 function iterate(itr::PartitionIterator, state...)
-    v = Vector{eltype(itr.c)}(undef, itr.n)
     # This is necessary to remember whether we cut the
     # last element short. In such cases, we do return that
     # element, but not the next one
     state === (IterationCutShort(),) && return nothing
+    v = Vector{eltype(itr.c)}(undef, itr.n)
     i = 0
     y = iterate(itr.c, state...)
     while y !== nothing
@@ -1211,6 +1248,12 @@ julia> collect(a)
 2-element Vector{Char}:
  'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)
  'f': ASCII/Unicode U+0066 (category Ll: Letter, lowercase)
+
+julia> Iterators.reset!(a); popfirst!(a)
+'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)
+
+julia> Iterators.reset!(a, "hello"); popfirst!(a)
+'h': ASCII/Unicode U+0068 (category Ll: Letter, lowercase)
 ```
 
 ```jldoctest
@@ -1239,7 +1282,7 @@ mutable struct Stateful{T, VS}
     end
 end
 
-function reset!(s::Stateful{T,VS}, itr::T) where {T,VS}
+function reset!(s::Stateful{T,VS}, itr::T=s.itr) where {T,VS}
     s.itr = itr
     setfield!(s, :nextvalstate, iterate(itr))
     s.taken = 0
@@ -1300,7 +1343,7 @@ length(s::Stateful) = length(s.itr) - s.taken
 Returns the one and only element of collection `x`, and throws an `ArgumentError` if the
 collection has zero or multiple elements.
 
-See also: [`first`](@ref), [`last`](@ref).
+See also [`first`](@ref), [`last`](@ref).
 
 !!! compat "Julia 1.4"
     This method requires at least Julia 1.4.
@@ -1310,7 +1353,7 @@ See also: [`first`](@ref), [`last`](@ref).
     @boundscheck if i === nothing
         throw(ArgumentError("Collection is empty, must contain exactly 1 element"))
     end
-    (ret, state) = i
+    (ret, state) = i::NTuple{2,Any}
     @boundscheck if iterate(x, state) !== nothing
         throw(ArgumentError("Collection has multiple elements, must contain exactly 1 element"))
     end
diff --git a/base/libc.jl b/base/libc.jl
index 547561ac964bac..98d2910917ee48 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -131,7 +131,7 @@ Suspends execution for `s` seconds.
 This function does not yield to Julia's scheduler and therefore blocks
 the Julia thread that it is running on for the duration of the sleep time.
 
-See also: [`sleep`](@ref)
+See also [`sleep`](@ref).
 """
 systemsleep
 
@@ -402,6 +402,79 @@ Interface to the C `srand(seed)` function.
 """
 srand(seed=floor(Int, time()) % Cuint) = ccall(:srand, Cvoid, (Cuint,), seed)
 
+struct Cpasswd
+   username::Cstring
+   uid::Clong
+   gid::Clong
+   shell::Cstring
+   homedir::Cstring
+   gecos::Cstring
+   Cpasswd() = new(C_NULL, -1, -1, C_NULL, C_NULL, C_NULL)
+end
+mutable struct Cgroup
+    groupname::Cstring     # group name
+    gid::Clong        # group ID
+    mem::Ptr{Cstring} # group members
+    Cgroup() = new(C_NULL, -1, C_NULL)
+end
+struct Passwd
+    username::String
+    uid::Int
+    gid::Int
+    shell::String
+    homedir::String
+    gecos::String
+end
+struct Group
+    groupname::String
+    gid::Int
+    mem::Vector{String}
+end
+
+function getpwuid(uid::Unsigned, throw_error::Bool=true)
+    ref_pd = Ref(Cpasswd())
+    ret = ccall(:jl_os_get_passwd, Cint, (Ref{Cpasswd}, UInt), ref_pd, uid)
+    if ret != 0
+        throw_error && Base.uv_error("getpwuid", ret)
+        return
+    end
+    pd = ref_pd[]
+    pd = Passwd(
+        pd.username == C_NULL ? "" : unsafe_string(pd.username),
+        pd.uid,
+        pd.gid,
+        pd.shell == C_NULL ? "" : unsafe_string(pd.shell),
+        pd.homedir == C_NULL ? "" : unsafe_string(pd.homedir),
+        pd.gecos == C_NULL ? "" : unsafe_string(pd.gecos),
+    )
+    ccall(:uv_os_free_passwd, Cvoid, (Ref{Cpasswd},), ref_pd)
+    return pd
+end
+function getgrgid(gid::Unsigned, throw_error::Bool=true)
+    ref_gp = Ref(Cgroup())
+    ret = ccall(:jl_os_get_group, Cint, (Ref{Cgroup}, UInt), ref_gp, gid)
+    if ret != 0
+        throw_error && Base.uv_error("getgrgid", ret)
+        return
+    end
+    gp = ref_gp[]
+    members = String[]
+    if gp.mem != C_NULL
+        while true
+            mem = unsafe_load(gp.mem, length(members) + 1)
+            mem == C_NULL && break
+            push!(members, unsafe_string(mem))
+        end
+    end
+    gp = Group(
+         gp.groupname == C_NULL ? "" : unsafe_string(gp.groupname),
+         gp.gid,
+         members,
+    )
+    ccall(:jl_os_free_group, Cvoid, (Ref{Cgroup},), ref_gp)
+    return gp
+end
+
 # Include dlopen()/dlpath() code
 include("libdl.jl")
 using .Libdl
diff --git a/base/libdl.jl b/base/libdl.jl
index 9f37eb2034007c..4f29260bb24f82 100644
--- a/base/libdl.jl
+++ b/base/libdl.jl
@@ -237,9 +237,10 @@ julia> dlpath("libjulia")
 ```
 """
 function dlpath(libname::Union{AbstractString, Symbol})
-    dlopen(libname, RTLD_NOLOAD) do handle
-        return dlpath(handle)
-    end
+    handle = dlopen(libname)
+    path = dlpath(handle)
+    dlclose(handle)
+    return path
 end
 
 if Sys.isapple()
diff --git a/base/libuv.jl b/base/libuv.jl
index 82298516f4a1b9..c64cbff564b66d 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -74,7 +74,7 @@ end
 ## Libuv error handling ##
 
 struct IOError <: Exception
-    msg::AbstractString
+    msg::String
     code::Int32
     IOError(msg::AbstractString, code::Integer) = new(msg, code)
 end
@@ -107,6 +107,7 @@ end
 function uv_alloc_buf end
 function uv_readcb end
 function uv_writecb_task end
+function uv_shutdowncb_task end
 function uv_return_spawn end
 function uv_asynccb end
 function uv_timercb end
diff --git a/base/loading.jl b/base/loading.jl
index 1f317b1b161d99..a87a6ed7423c52 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -129,12 +129,21 @@ end
 const ns_dummy_uuid = UUID("fe0723d6-3a44-4c41-8065-ee0f42c8ceab")
 
 function dummy_uuid(project_file::String)
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        uuid = get(cache.dummy_uuid, project_file, nothing)
+        uuid === nothing || return uuid
+    end
     project_path = try
         realpath(project_file)
     catch
         project_file
     end
-    return uuid5(ns_dummy_uuid, project_path)
+    uuid = uuid5(ns_dummy_uuid, project_path)
+    if cache !== nothing
+        cache.dummy_uuid[project_file] = uuid
+    end
+    return uuid
 end
 
 ## package path slugs: turning UUID + SHA1 into a pair of 4-byte "slugs" ##
@@ -203,7 +212,6 @@ function get_updated_dict(p::TOML.Parser, f::CachedTOMLDict)
             f.mtime = s.mtime
             f.size = s.size
             f.hash = new_hash
-            @debug "Cache of TOML file $(repr(f.path)) invalid, reparsing..."
             TOML.reinit!(p, String(content); filepath=f.path)
             return f.d = TOML.parse(p)
         end
@@ -211,6 +219,17 @@ function get_updated_dict(p::TOML.Parser, f::CachedTOMLDict)
     return f.d
 end
 
+struct LoadingCache
+    load_path::Vector{String}
+    dummy_uuid::Dict{String, UUID}
+    env_project_file::Dict{String, Union{Bool, String}}
+    project_file_manifest_path::Dict{String, Union{Nothing, String}}
+    require_parsed::Set{String}
+end
+const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing)
+LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set())
+
+
 struct TOMLCache
     p::TOML.Parser
     d::Dict{String, CachedTOMLDict}
@@ -221,15 +240,25 @@ const TOML_LOCK = ReentrantLock()
 parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, TOML_LOCK)
 function parsed_toml(project_file::AbstractString, toml_cache::TOMLCache, toml_lock::ReentrantLock)
     lock(toml_lock) do
-        if !haskey(toml_cache.d, project_file)
-            @debug "Creating new cache for $(repr(project_file))"
+        cache = LOADING_CACHE[]
+        dd = if !haskey(toml_cache.d, project_file)
             d = CachedTOMLDict(toml_cache.p, project_file)
             toml_cache.d[project_file] = d
-            return d.d
+            d.d
         else
             d = toml_cache.d[project_file]
-            return get_updated_dict(toml_cache.p, d)
+            # We are in a require call and have already parsed this TOML file
+            # assume that it is unchanged to avoid hitting disk
+            if cache !== nothing && project_file in cache.require_parsed
+                d.d
+            else
+                get_updated_dict(toml_cache.p, d)
+            end
         end
+        if cache !== nothing
+            push!(cache.require_parsed, project_file)
+        end
+        return dd
     end
 end
 
@@ -312,21 +341,35 @@ function pathof(m::Module)
     pkgid === nothing && return nothing
     origin = get(Base.pkgorigins, pkgid, nothing)
     origin === nothing && return nothing
-    origin.path === nothing && return nothing
-    return fixup_stdlib_path(origin.path)
+    path = origin.path
+    path === nothing && return nothing
+    return fixup_stdlib_path(path)
 end
 
 """
-    pkgdir(m::Module)
+    pkgdir(m::Module[, paths::String...])
+
+Return the root directory of the package that imported module `m`,
+or `nothing` if `m` was not imported from a package. Optionally further
+path component strings can be provided to construct a path within the
+package root.
 
- Return the root directory of the package that imported module `m`,
- or `nothing` if `m` was not imported from a package.
- """
-function pkgdir(m::Module)
+```julia
+julia> pkgdir(Foo)
+"/path/to/Foo.jl"
+
+julia> pkgdir(Foo, "src", "file.jl")
+"/path/to/Foo.jl/src/file.jl"
+```
+
+!!! compat "Julia 1.7"
+    The optional argument `paths` requires at least Julia 1.7.
+"""
+function pkgdir(m::Module, paths::String...)
     rootmodule = Base.moduleroot(m)
     path = pathof(rootmodule)
     path === nothing && return nothing
-    return dirname(dirname(path))
+    return joinpath(dirname(dirname(path)), paths...)
 end
 
 ## generic project & manifest API ##
@@ -340,16 +383,29 @@ const preferences_names = ("JuliaLocalPreferences.toml", "LocalPreferences.toml"
 #  - `true`: `env` is an implicit environment
 #  - `path`: the path of an explicit project file
 function env_project_file(env::String)::Union{Bool,String}
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        project_file = get(cache.env_project_file, env, nothing)
+        project_file === nothing || return project_file
+    end
     if isdir(env)
         for proj in project_names
-            project_file = joinpath(env, proj)
-            isfile_casesensitive(project_file) && return project_file
+            maybe_project_file = joinpath(env, proj)
+            if isfile_casesensitive(maybe_project_file)
+                project_file = maybe_project_file
+                break
+            end
         end
-        return true
+        project_file =true
     elseif basename(env) in project_names && isfile_casesensitive(env)
-        return env
+        project_file = env
+    else
+        project_file = false
+    end
+    if cache !== nothing
+        cache.env_project_file[env] = project_file
     end
-    return false
+    return project_file
 end
 
 function project_deps_get(env::String, name::String)::Union{Nothing,PkgId}
@@ -403,10 +459,9 @@ end
 
 # find project file's top-level UUID entry (or nothing)
 function project_file_name_uuid(project_file::String, name::String)::PkgId
-    uuid = dummy_uuid(project_file)
     d = parsed_toml(project_file)
     uuid′ = get(d, "uuid", nothing)::Union{String, Nothing}
-    uuid′ === nothing || (uuid = UUID(uuid′))
+    uuid = uuid′ === nothing ? dummy_uuid(project_file) : UUID(uuid′)
     name = get(d, "name", name)::String
     return PkgId(uuid, name)
 end
@@ -418,18 +473,34 @@ end
 
 # find project file's corresponding manifest file
 function project_file_manifest_path(project_file::String)::Union{Nothing,String}
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        manifest_path = get(cache.project_file_manifest_path, project_file, missing)
+        manifest_path === missing || return manifest_path
+    end
     dir = abspath(dirname(project_file))
     d = parsed_toml(project_file)
     explicit_manifest = get(d, "manifest", nothing)::Union{String, Nothing}
+    manifest_path = nothing
     if explicit_manifest !== nothing
         manifest_file = normpath(joinpath(dir, explicit_manifest))
-        isfile_casesensitive(manifest_file) && return manifest_file
+        if isfile_casesensitive(manifest_file)
+            manifest_path = manifest_file
+        end
+    end
+    if manifest_path === nothing
+        for mfst in manifest_names
+            manifest_file = joinpath(dir, mfst)
+            if isfile_casesensitive(manifest_file)
+                manifest_path = manifest_file
+                break
+            end
+        end
     end
-    for mfst in manifest_names
-        manifest_file = joinpath(dir, mfst)
-        isfile_casesensitive(manifest_file) && return manifest_file
+    if cache !== nothing
+        cache.project_file_manifest_path[project_file] = manifest_path
     end
-    return nothing
+    return manifest_path
 end
 
 # given a directory (implicit env from LOAD_PATH) and a name,
@@ -486,12 +557,34 @@ function explicit_project_deps_get(project_file::String, name::String)::Union{No
     return nothing
 end
 
+function is_v1_format_manifest(raw_manifest::Dict)
+    if haskey(raw_manifest, "manifest_format")
+        if raw_manifest["manifest_format"] isa Dict && haskey(raw_manifest["manifest_format"], "uuid")
+            # the off-chance where an old format manifest has a dep called "manifest_format"
+            return true
+        end
+        return false
+    else
+        return true
+    end
+end
+
+# returns a deps list for both old and new manifest formats
+function get_deps(raw_manifest::Dict)
+    if is_v1_format_manifest(raw_manifest)
+        return raw_manifest
+    else
+        # if the manifest has no deps, there won't be a `deps` field
+        return get(Dict{String, Any}, raw_manifest, "deps")
+    end
+end
+
 # find `where` stanza and return the PkgId for `name`
 # return `nothing` if it did not find `where` (indicating caller should continue searching)
 function explicit_manifest_deps_get(project_file::String, where::UUID, name::String)::Union{Nothing,PkgId}
     manifest_file = project_file_manifest_path(project_file)
     manifest_file === nothing && return nothing # manifest not found--keep searching LOAD_PATH
-    d = parsed_toml(manifest_file)
+    d = get_deps(parsed_toml(manifest_file))
     found_where = false
     found_name = false
     for (dep_name, entries) in d
@@ -539,7 +632,7 @@ function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{No
     manifest_file = project_file_manifest_path(project_file)
     manifest_file === nothing && return nothing # no manifest, skip env
 
-    d = parsed_toml(manifest_file)
+    d = get_deps(parsed_toml(manifest_file))
     entries = get(d, pkg.name, nothing)::Union{Nothing, Vector{Any}}
     entries === nothing && return nothing # TODO: allow name to mismatch?
     for entry in entries
@@ -563,10 +656,11 @@ function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry::
     hash === nothing && return nothing
     hash = SHA1(hash)
     # Keep the 4 since it used to be the default
-    for slug in (version_slug(pkg.uuid, hash, 4), version_slug(pkg.uuid, hash))
+    uuid = pkg.uuid::UUID # checked within `explicit_manifest_uuid_path`
+    for slug in (version_slug(uuid, hash), version_slug(uuid, hash, 4))
         for depot in DEPOT_PATH
-            path = abspath(depot, "packages", pkg.name, slug)
-            ispath(path) && return path
+            path = joinpath(depot, "packages", pkg.name, slug)
+            ispath(path) && return abspath(path)
         end
     end
     return nothing
@@ -675,7 +769,7 @@ function _include_from_serialized(path::String, depmods::Vector{Any})
     return restored
 end
 
-function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::Union{Nothing, String})
+function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::Union{Nothing, String}, depth::Int = 0)
     if root_module_exists(modkey)
         M = root_module(modkey)
         if PkgId(M) == modkey && module_build_id(M) === build_id
@@ -686,7 +780,7 @@ function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::U
             modpath = locate_package(modkey)
             modpath === nothing && return nothing
         end
-        mod = _require_search_from_serialized(modkey, String(modpath))
+        mod = _require_search_from_serialized(modkey, String(modpath), depth)
         get!(PkgOrigin, pkgorigins, modkey).path = modpath
         if !isa(mod, Bool)
             for callback in package_callbacks
@@ -727,10 +821,14 @@ function _require_from_serialized(path::String)
     return _include_from_serialized(path, depmods)
 end
 
+# use an Int counter so that nested @time_imports calls all remain open
+const TIMING_IMPORTS = Threads.Atomic{Int}(0)
+
 # returns `true` if require found a precompile cache for this sourcepath, but couldn't load it
 # returns `false` if the module isn't known to be precompilable
 # returns the set of modules restored if the cache load succeeded
-function _require_search_from_serialized(pkg::PkgId, sourcepath::String)
+function _require_search_from_serialized(pkg::PkgId, sourcepath::String, depth::Int = 0)
+    t_before = time_ns()
     paths = find_all_in_cache_path(pkg)
     for path_to_try in paths::Vector{String}
         staledeps = stale_cachefile(sourcepath, path_to_try)
@@ -746,7 +844,7 @@ function _require_search_from_serialized(pkg::PkgId, sourcepath::String)
             dep = staledeps[i]
             dep isa Module && continue
             modpath, modkey, build_id = dep::Tuple{String, PkgId, UInt64}
-            dep = _tryrequire_from_serialized(modkey, build_id, modpath)
+            dep = _tryrequire_from_serialized(modkey, build_id, modpath, depth + 1)
             if dep === nothing
                 @debug "Required dependency $modkey failed to load from cache file for $modpath."
                 staledeps = true
@@ -761,6 +859,13 @@ function _require_search_from_serialized(pkg::PkgId, sourcepath::String)
         if isa(restored, Exception)
             @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
         else
+            if TIMING_IMPORTS[] > 0
+                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
+                tree_prefix = depth == 0 ? "" : "$("  "^(depth-1))┌ "
+                print("$(lpad(elapsed, 9)) ms  ")
+                printstyled(tree_prefix, color = :light_black)
+                println(pkg.name)
+            end
             return restored
         end
     end
@@ -863,42 +968,47 @@ For more details regarding code loading, see the manual sections on [modules](@r
 [parallel computing](@ref code-availability).
 """
 function require(into::Module, mod::Symbol)
-    uuidkey = identify_package(into, String(mod))
-    # Core.println("require($(PkgId(into)), $mod) -> $uuidkey")
-    if uuidkey === nothing
-        where = PkgId(into)
-        if where.uuid === nothing
-            throw(ArgumentError("""
-                Package $mod not found in current path:
-                - Run `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package.
-                """))
-        else
-            s = """
-            Package $(where.name) does not have $mod in its dependencies:
-            - If you have $(where.name) checked out for development and have
-              added $mod as a dependency but haven't updated your primary
-              environment's manifest file, try `Pkg.resolve()`.
-            - Otherwise you may need to report an issue with $(where.name)"""
-
-            uuidkey = identify_package(PkgId(string(into)), String(mod))
-            uuidkey === nothing && throw(ArgumentError(s))
-
-            # fall back to toplevel loading with a warning
-            if !(where in modules_warned_for)
-                @warn string(
-                    full_warning_showed[] ? "" : s, "\n",
-                    string("Loading $(mod) into $(where.name) from project dependency, ",
-                           "future warnings for $(where.name) are suppressed.")
-                ) _module = nothing _file = nothing _group = nothing
-                push!(modules_warned_for, where)
+    LOADING_CACHE[] = LoadingCache()
+    try
+        uuidkey = identify_package(into, String(mod))
+        # Core.println("require($(PkgId(into)), $mod) -> $uuidkey")
+        if uuidkey === nothing
+            where = PkgId(into)
+            if where.uuid === nothing
+                throw(ArgumentError("""
+                    Package $mod not found in current path:
+                    - Run `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package.
+                    """))
+            else
+                s = """
+                Package $(where.name) does not have $mod in its dependencies:
+                - If you have $(where.name) checked out for development and have
+                  added $mod as a dependency but haven't updated your primary
+                  environment's manifest file, try `Pkg.resolve()`.
+                - Otherwise you may need to report an issue with $(where.name)"""
+
+                uuidkey = identify_package(PkgId(string(into)), String(mod))
+                uuidkey === nothing && throw(ArgumentError(s))
+
+                # fall back to toplevel loading with a warning
+                if !(where in modules_warned_for)
+                    @warn string(
+                        full_warning_showed[] ? "" : s, "\n",
+                        string("Loading $(mod) into $(where.name) from project dependency, ",
+                               "future warnings for $(where.name) are suppressed.")
+                    ) _module = nothing _file = nothing _group = nothing
+                    push!(modules_warned_for, where)
+                end
+                full_warning_showed[] = true
             end
-            full_warning_showed[] = true
         end
+        if _track_dependencies[]
+            push!(_require_dependencies, (into, binpack(uuidkey), 0.0))
+        end
+        return require(uuidkey)
+    finally
+        LOADING_CACHE[] = nothing
     end
-    if _track_dependencies[]
-        push!(_require_dependencies, (into, binpack(uuidkey), 0.0))
-    end
-    return require(uuidkey)
 end
 
 mutable struct PkgOrigin
@@ -1142,7 +1252,7 @@ Base.include # defined in Base.jl
 
 # Full include() implementation which is used after bootstrap
 function _include(mapexpr::Function, mod::Module, _path::AbstractString)
-    @_noinline_meta # Workaround for module availability in _simplify_include_frames
+    @noinline # Workaround for module availability in _simplify_include_frames
     path, prev = _include_dependency(mod, _path)
     for callback in include_callbacks # to preserve order, must come before eval in include_string
         invokelatest(callback, mod, path)
@@ -1303,16 +1413,20 @@ end
 
 const MAX_NUM_PRECOMPILE_FILES = Ref(10)
 
-function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
+                      ignore_loaded_modules::Bool = true)
+
     @nospecialize internal_stderr internal_stdout
     # decide where to put the resulting cache file
     cachepath = compilecache_dir(pkg)
 
     # build up the list of modules that we want the precompile process to preserve
     concrete_deps = copy(_concrete_dependencies)
-    for (key, mod) in loaded_modules
-        if !(mod === Main || mod === Core || mod === Base)
-            push!(concrete_deps, key => module_build_id(mod))
+    if ignore_loaded_modules
+        for (key, mod) in loaded_modules
+            if !(mod === Main || mod === Core || mod === Base)
+                push!(concrete_deps, key => module_build_id(mod))
+            end
         end
     end
     # run the expression and cache the result
@@ -1332,8 +1446,8 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             open(tmppath, "a+") do f
                 write(f, _crc32c(seekstart(f)))
             end
-            # inherit permission from the source file
-            chmod(tmppath, filemode(path) & 0o777)
+            # inherit permission from the source file (and make them writable)
+            chmod(tmppath, filemode(path) & 0o777 | 0o200)
 
             # Read preferences hash back from .ji file (we can't precompute because
             # we don't actually know what the list of compile-time preferences are without compiling)
@@ -1662,9 +1776,9 @@ function get_preferences_hash(uuid::Union{UUID, Nothing}, prefs_list::Vector{Str
 
     # Walk through each name that's called out as a compile-time preference
     for name in prefs_list
-        prefs_name = get(prefs, name, nothing)::Union{String, Nothing}
-        if prefs_name !== nothing
-            h = hash(prefs_name, h)
+        prefs_value = get(prefs, name, nothing)
+        if prefs_value !== nothing
+            h = hash(prefs_value, h)
         end
     end
     # We always return a `UInt64` so that our serialization format is stable
@@ -1745,7 +1859,7 @@ function stale_cachefile(modpath::String, cachefile::String)
         # now check if this file is fresh relative to its source files
         if !skip_timecheck
             if !samefile(includes[1].filename, modpath)
-                @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename)) not file $modpath"
+                @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
                 return true # cache file was compiled from a different path
             end
             for (modkey, req_modkey) in requires
@@ -1823,11 +1937,13 @@ function precompile(@nospecialize(f), args::Tuple)
     precompile(Tuple{Core.Typeof(f), args...})
 end
 
+const ENABLE_PRECOMPILE_WARNINGS = Ref(false)
 function precompile(argt::Type)
-    if ccall(:jl_compile_hint, Int32, (Any,), argt) == 0
+    ret = ccall(:jl_compile_hint, Int32, (Any,), argt) != 0
+    if !ret && ENABLE_PRECOMPILE_WARNINGS[]
         @warn "Inactive precompile statement" maxlog=100 form=argt _module=nothing _file=nothing _line=0
     end
-    true
+    return ret
 end
 
 precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing))
diff --git a/base/lock.jl b/base/lock.jl
index b013a593cde84c..07253211984fc2 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -201,6 +201,22 @@ function trylock(f, l::AbstractLock)
     return false
 end
 
+"""
+    @lock l expr
+
+Macro version of `lock(f, l::AbstractLock)` but with `expr` instead of `f` function.
+Expands to:
+```julia
+lock(l)
+try
+    expr
+finally
+    unlock(l)
+end
+```
+This is similar to using [`lock`](@ref) with a `do` block, but avoids creating a closure
+and thus can improve the performance.
+"""
 macro lock(l, expr)
     quote
         temp = $(esc(l))
@@ -213,6 +229,13 @@ macro lock(l, expr)
     end
 end
 
+"""
+    @lock_nofail l expr
+
+Equivalent to `@lock l expr` for cases in which we can guarantee that the function
+will not throw any error. In this case, avoiding try-catch can improve the performance.
+See [`@lock`](@ref).
+"""
 macro lock_nofail(l, expr)
     quote
         temp = $(esc(l))
diff --git a/base/logging.jl b/base/logging.jl
index 066523771f5fbc..712fe19c23699a 100644
--- a/base/logging.jl
+++ b/base/logging.jl
@@ -79,15 +79,15 @@ function _invoked_shouldlog(logger, level, _module, group, id)
         shouldlog,
         Tuple{typeof(logger), typeof(level), typeof(_module), typeof(group), typeof(id)},
         logger, level, _module, group, id
-    )
+    )::Bool
 end
 
 function _invoked_min_enabled_level(@nospecialize(logger))
-    return invoke(min_enabled_level, Tuple{typeof(logger)}, logger)
+    return invoke(min_enabled_level, Tuple{typeof(logger)}, logger)::LogLevel
 end
 
 function _invoked_catch_exceptions(@nospecialize(logger))
-    return invoke(catch_exceptions, Tuple{typeof(logger)}, logger)
+    return invoke(catch_exceptions, Tuple{typeof(logger)}, logger)::Bool
 end
 
 """
@@ -133,9 +133,29 @@ isless(a::LogLevel, b::LogLevel) = isless(a.level, b.level)
 convert(::Type{LogLevel}, level::Integer) = LogLevel(level)
 
 const BelowMinLevel = LogLevel(-1000001)
+"""
+    Debug
+
+Alias for [`LogLevel(-1000)`](@ref LogLevel).
+"""
 const Debug         = LogLevel(   -1000)
+"""
+    Info
+
+Alias for [`LogLevel(0)`](@ref LogLevel).
+"""
 const Info          = LogLevel(       0)
+"""
+    Warn
+
+Alias for [`LogLevel(1000)`](@ref LogLevel).
+"""
 const Warn          = LogLevel(    1000)
+"""
+    Error
+
+Alias for [`LogLevel(2000)`](@ref LogLevel).
+"""
 const Error         = LogLevel(    2000)
 const AboveMaxLevel = LogLevel( 1000001)
 
@@ -611,21 +631,25 @@ attached to the task.
 """
 current_logger() = current_logstate().logger
 
+const closed_stream = IOBuffer(UInt8[])
+close(closed_stream)
 
 #-------------------------------------------------------------------------------
 # SimpleLogger
 """
-    SimpleLogger(stream=stderr, min_level=Info)
+    SimpleLogger([stream,] min_level=Info)
 
 Simplistic logger for logging all messages with level greater than or equal to
-`min_level` to `stream`.
+`min_level` to `stream`. If stream is closed then messages with log level
+greater or equal to `Warn` will be logged to `stderr` and below to `stdout`.
 """
 struct SimpleLogger <: AbstractLogger
     stream::IO
     min_level::LogLevel
     message_limits::Dict{Any,Int}
 end
-SimpleLogger(stream::IO=stderr, level=Info) = SimpleLogger(stream, level, Dict{Any,Int}())
+SimpleLogger(stream::IO, level=Info) = SimpleLogger(stream, level, Dict{Any,Int}())
+SimpleLogger(level=Info) = SimpleLogger(closed_stream, level)
 
 shouldlog(logger::SimpleLogger, level, _module, group, id) =
     get(logger.message_limits, id, 1) > 0
@@ -644,7 +668,11 @@ function handle_message(logger::SimpleLogger, level::LogLevel, message, _module,
         remaining > 0 || return
     end
     buf = IOBuffer()
-    iob = IOContext(buf, logger.stream)
+    stream = logger.stream
+    if !isopen(stream)
+        stream = stderr
+    end
+    iob = IOContext(buf, stream)
     levelstr = level == Warn ? "Warning" : string(level)
     msglines = split(chomp(string(message)::String), '\n')
     println(iob, "┌ ", levelstr, ": ", msglines[1])
@@ -656,10 +684,10 @@ function handle_message(logger::SimpleLogger, level::LogLevel, message, _module,
         println(iob, "│   ", key, " = ", val)
     end
     println(iob, "└ @ ", _module, " ", filepath, ":", line)
-    write(logger.stream, take!(buf))
+    write(stream, take!(buf))
     nothing
 end
 
-_global_logstate = LogState(SimpleLogger(Core.stderr, CoreLogging.Info))
+_global_logstate = LogState(SimpleLogger())
 
 end # CoreLogging
diff --git a/base/math.jl b/base/math.jl
index 42f10760ed4fb9..3857b1b1e8c102 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -47,15 +47,17 @@ end
 Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
 are promoted to a common type.
 
+See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
+
 # Examples
 ```jldoctest
-julia> clamp.([pi, 1.0, big(10.)], 2., 9.)
+julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
 3-element Vector{BigFloat}:
  3.141592653589793238462643383279502884197169399375105820974944592307816406286198
  2.0
  9.0
 
-julia> clamp.([11,8,5],10,6) # an example where lo > hi
+julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
 3-element Vector{Int64}:
   6
   6
@@ -73,12 +75,18 @@ clamp(x::X, lo::L, hi::H) where {X,L,H} =
 
 Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
 
+See also [`trunc`](@ref).
+
 # Examples
 ```jldoctest
 julia> clamp(200, Int8)
 127
+
 julia> clamp(-200, Int8)
 -128
+
+julia> trunc(Int, 4pi^2)
+39
 ```
 """
 clamp(x, ::Type{T}) where {T<:Integer} = clamp(x, typemin(T), typemax(T)) % T
@@ -89,6 +97,19 @@ clamp(x, ::Type{T}) where {T<:Integer} = clamp(x, typemin(T), typemax(T)) % T
 
 Restrict values in `array` to the specified range, in-place.
 See also [`clamp`](@ref).
+
+# Examples
+```jldoctest
+julia> row = collect(-4:4)';
+
+julia> clamp!(row, 0, Inf)
+1×9 adjoint(::Vector{Int64}) with eltype Int64:
+ 0  0  0  0  0  1  2  3  4
+
+julia> clamp.((-4:4)', 0, Inf)
+1×9 Matrix{Float64}:
+ 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
+```
 """
 function clamp!(x::AbstractArray, lo, hi)
     @inbounds for i in eachindex(x)
@@ -205,6 +226,8 @@ end
     @horner(x, p...)
 
 Evaluate `p[1] + x * (p[2] + x * (....))`, i.e. a polynomial via Horner's rule.
+
+See also [`@evalpoly`](@ref), [`evalpoly`](@ref).
 """
 macro horner(x, p...)
      xesc, pesc = esc(x), esc.(p)
@@ -224,6 +247,8 @@ that is, the coefficients are given in ascending order by power of `z`.  This ma
 to efficient inline code that uses either Horner's method or, for complex `z`, a more
 efficient Goertzel-like algorithm.
 
+See also [`evalpoly`](@ref).
+
 # Examples
 ```jldoctest
 julia> @evalpoly(3, 1, 0, 1)
@@ -241,6 +266,20 @@ macro evalpoly(z, p...)
     :(evalpoly($zesc, ($(pesc...),)))
 end
 
+# polynomial evaluation using compensated summation.
+# much more accurate, especially when lo can be combined with other rounding errors
+@inline function exthorner(x, p::Tuple)
+    hi, lo = p[end], zero(x)
+    for i in length(p)-1:-1:1
+        pi = p[i]
+        prod = hi*x
+        err = fma(hi, x, -prod)
+        hi = pi+prod
+        lo = fma(lo, x, prod - (hi - pi) + err)
+    end
+    return hi, lo
+end
+
 """
     rad2deg(x)
 
@@ -259,6 +298,8 @@ rad2deg(z::AbstractFloat) = z * (180 / oftype(z, pi))
 
 Convert `x` from degrees to radians.
 
+See also: [`rad2deg`](@ref), [`sind`](@ref).
+
 # Examples
 ```jldoctest
 julia> deg2rad(90)
@@ -365,23 +406,6 @@ Compute the inverse hyperbolic sine of `x`.
 """
 asinh(x::Number)
 
-"""
-    expm1(x)
-
-Accurately compute ``e^x-1``. It avoids the loss of precision involved in the direct
-evaluation of exp(x)-1 for small values of x.
-# Examples
-```jldoctest
-julia> expm1(1e-16)
-1.0e-16
-
-julia> exp(1e-16) - 1
-0.0
-```
-"""
-expm1(x)
-expm1(x::Float64) = ccall((:expm1,libm), Float64, (Float64,), x)
-expm1(x::Float32) = ccall((:expm1f,libm), Float32, (Float32,), x)
 
 # utility for converting NaN return to DomainError
 # the branch in nan_dom_err prevents its callers from inlining, so be sure to force it
@@ -393,6 +417,8 @@ expm1(x::Float32) = ccall((:expm1f,libm), Float32, (Float32,), x)
     sin(x)
 
 Compute sine of `x`, where `x` is in radians.
+
+See also [`sind`], [`sinpi`], [`sincos`], [`cis`].
 """
 sin(x::Number)
 
@@ -400,6 +426,8 @@ sin(x::Number)
     cos(x)
 
 Compute cosine of `x`, where `x` is in radians.
+
+See also [`cosd`], [`cospi`], [`sincos`], [`cis`].
 """
 cos(x::Number)
 
@@ -444,6 +472,8 @@ atanh(x::Number)
 Compute the natural logarithm of `x`. Throws [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments. Use complex negative arguments to obtain complex results.
 
+See also [`log1p`], [`log2`], [`log10`].
+
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
 julia> log(2)
@@ -465,6 +495,8 @@ log(x::Number)
 Compute the logarithm of `x` to base 2. Throws [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments.
 
+See also: [`exp2`](@ref), [`ldexp`](@ref), [`ispow2`](@ref).
+
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
 julia> log2(4)
@@ -542,6 +574,8 @@ end
 Return ``\\sqrt{x}``. Throws [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
 Use complex negative arguments instead. The prefix operator `√` is equivalent to `sqrt`.
 
+See also: [`hypot`](@ref).
+
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
 julia> sqrt(big(81))
@@ -556,6 +590,13 @@ Stacktrace:
 
 julia> sqrt(big(complex(-81)))
 0.0 + 9.0im
+
+julia> .√(1:4)
+4-element Vector{Float64}:
+ 1.0
+ 1.4142135623730951
+ 1.7320508075688772
+ 2.0
 ```
 """
 sqrt(x)
@@ -771,17 +812,24 @@ end
 """
     significand(x)
 
-Extract the `significand(s)` (a.k.a. mantissa), in binary representation, of a
-floating-point number. If `x` is a non-zero finite number, then the result will be
-a number of the same type on the interval ``[1,2)``. Otherwise `x` is returned.
+Extract the significand (a.k.a. mantissa) of a floating-point number. If `x` is
+a non-zero finite number, then the result will be a number of the same type and
+sign as `x`, and whose absolute value is on the interval ``[1,2)``. Otherwise
+`x` is returned.
 
 # Examples
 ```jldoctest
-julia> significand(15.2)/15.2
-0.125
+julia> significand(15.2)
+1.9
+
+julia> significand(-15.2)
+-1.9
+
+julia> significand(-15.2) * 2^3
+-15.2
 
-julia> significand(15.2)*8
-15.2
+julia> significand(-Inf), significand(Inf), significand(NaN)
+(-Inf, Inf, NaN)
 ```
 """
 function significand(x::T) where T<:IEEEFloat
@@ -1154,7 +1202,7 @@ include("special/log.jl")
 
 # Float16 definitions
 
-for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
+for func in (:sin,:cos,:tan,:asin,:acos,:atan,:cosh,:tanh,:asinh,:acosh,
              :atanh,:log,:log2,:log10,:sqrt,:lgamma,:log1p)
     @eval begin
         $func(a::Float16) = Float16($func(Float32(a)))
@@ -1162,13 +1210,12 @@ for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
     end
 end
 
-for func in (:exp,:exp2,:exp10)
+for func in (:exp,:exp2,:exp10,:sinh)
      @eval $func(a::ComplexF16) = ComplexF16($func(ComplexF32(a)))
 end
 
 
 atan(a::Float16,b::Float16) = Float16(atan(Float32(a),Float32(b)))
-cbrt(a::Float16) = Float16(cbrt(Float32(a)))
 sincos(a::Float16) = Float16.(sincos(Float32(a)))
 
 for f in (:sin, :cos, :tan, :asin, :atan, :acos,
diff --git a/base/mathconstants.jl b/base/mathconstants.jl
index a3d1be99becbb5..156dc9e1ce39a2 100644
--- a/base/mathconstants.jl
+++ b/base/mathconstants.jl
@@ -23,10 +23,17 @@ Base.@irrational catalan  0.91596559417721901505  catalan
 
 The constant pi.
 
+Unicode `π` can be typed by writing `\\pi` then pressing tab in the Julia REPL, and in many editors.
+
+See also: [`sinpi`](@ref), [`sincospi`](@ref), [`deg2rad`](@ref).
+
 # Examples
 ```jldoctest
 julia> pi
 π = 3.1415926535897...
+
+julia> 1/2pi
+0.15915494309189535
 ```
 """
 π, const pi = π
@@ -37,10 +44,20 @@ julia> pi
 
 The constant ℯ.
 
+Unicode `ℯ` can be typed by writing `\\euler` and pressing tab in the Julia REPL, and in many editors.
+
+See also: [`exp`](@ref), [`cis`](@ref), [`cispi`](@ref).
+
 # Examples
 ```jldoctest
 julia> ℯ
 ℯ = 2.7182818284590...
+
+julia> log(ℯ)
+1
+
+julia> ℯ^(im)π ≈ -1
+true
 ```
 """
 ℯ, const e = ℯ
@@ -55,6 +72,11 @@ Euler's constant.
 ```jldoctest
 julia> Base.MathConstants.eulergamma
 γ = 0.5772156649015...
+
+julia> dx = 10^-6;
+
+julia> sum(-exp(-x) * log(x) for x in dx:dx:100) * dx
+0.5772078382499134
 ```
 """
 γ, const eulergamma = γ
@@ -69,6 +91,9 @@ The golden ratio.
 ```jldoctest
 julia> Base.MathConstants.golden
 φ = 1.6180339887498...
+
+julia> (2ans - 1)^2 ≈ 5
+true
 ```
 """
 φ, const golden = φ
@@ -82,6 +107,9 @@ Catalan's constant.
 ```jldoctest
 julia> Base.MathConstants.catalan
 catalan = 0.9159655941772...
+
+julia> sum(log(x)/(1+x^2) for x in 1:0.01:10^6) * 0.01
+0.9159466120554123
 ```
 """
 catalan
diff --git a/base/meta.jl b/base/meta.jl
index 98bbcc8cdd3581..649ffe9d1a19c3 100644
--- a/base/meta.jl
+++ b/base/meta.jl
@@ -19,6 +19,7 @@ export quot,
        @dump
 
 using Base: isidentifier, isoperator, isunaryoperator, isbinaryoperator, ispostfixoperator
+import Base: isexpr
 
 """
     Meta.quot(ex)::Expr
@@ -73,9 +74,7 @@ julia> Meta.isexpr(ex, :call, 2)
 true
 ```
 """
-isexpr(@nospecialize(ex), head::Symbol) = isa(ex, Expr) && ex.head === head
 isexpr(@nospecialize(ex), heads) = isa(ex, Expr) && in(ex.head, heads)
-isexpr(@nospecialize(ex), head::Symbol, n::Int) = isa(ex, Expr) && ex.head === head && length(ex.args) == n
 isexpr(@nospecialize(ex), heads, n::Int) = isa(ex, Expr) && in(ex.head, heads) && length(ex.args) == n
 
 """
@@ -188,7 +187,7 @@ The expression passed to the [`parse`](@ref) function could not be interpreted a
 expression.
 """
 struct ParseError <: Exception
-    msg::AbstractString
+    msg::String
 end
 
 function _parse_string(text::AbstractString, filename::AbstractString,
@@ -203,21 +202,32 @@ end
 """
     parse(str, start; greedy=true, raise=true, depwarn=true)
 
-Parse the expression string and return an expression (which could later be passed to eval
-for execution). `start` is the index of the first character to start parsing. If `greedy` is
-`true` (default), `parse` will try to consume as much input as it can; otherwise, it will
-stop as soon as it has parsed a valid expression. Incomplete but otherwise syntactically
-valid expressions will return `Expr(:incomplete, "(error message)")`. If `raise` is `true`
-(default), syntax errors other than incomplete expressions will raise an error. If `raise`
-is `false`, `parse` will return an expression that will raise an error upon evaluation. If
-`depwarn` is `false`, deprecation warnings will be suppressed.
+Parse the expression string and return an expression (which could later be
+passed to eval for execution). `start` is the code unit index into `str` of the
+first character to start parsing at (as with all string indexing, these are not
+character indices). If `greedy` is `true` (default), `parse` will try to consume
+as much input as it can; otherwise, it will stop as soon as it has parsed a
+valid expression. Incomplete but otherwise syntactically valid expressions will
+return `Expr(:incomplete, "(error message)")`. If `raise` is `true` (default),
+syntax errors other than incomplete expressions will raise an error. If `raise`
+is `false`, `parse` will return an expression that will raise an error upon
+evaluation. If `depwarn` is `false`, deprecation warnings will be suppressed.
 
 ```jldoctest
-julia> Meta.parse("x = 3, y = 5", 7)
-(:(y = 5), 13)
+julia> Meta.parse("(α, β) = 3, 5", 1) # start of string
+(:((α, β) = (3, 5)), 16)
 
-julia> Meta.parse("x = 3, y = 5", 5)
-(:((3, y) = 5), 13)
+julia> Meta.parse("(α, β) = 3, 5", 1, greedy=false)
+(:((α, β)), 9)
+
+julia> Meta.parse("(α, β) = 3, 5", 16) # end of string
+(nothing, 16)
+
+julia> Meta.parse("(α, β) = 3, 5", 11) # index of 3
+(:((3, 5)), 16)
+
+julia> Meta.parse("(α, β) = 3, 5", 11, greedy=false)
+(3, 13)
 ```
 """
 function parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true,
@@ -360,7 +370,10 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
     if isa(x, Expr)
         head = x.head
         if head === :static_parameter
-            return QuoteNode(static_param_values[x.args[1]])
+            if isassigned(static_param_values, x.args[1])
+                return QuoteNode(static_param_values[x.args[1]])
+            end
+            return x
         elseif head === :cfunction
             @assert !isa(type_signature, UnionAll) || !isempty(spvals)
             if !isa(x.args[2], QuoteNode) # very common no-op
@@ -403,7 +416,31 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
             x.args[2] += statement_offset
         elseif head === :enter
             x.args[1] += statement_offset
-        elseif !is_meta_expr_head(head)
+        elseif head === :isdefined
+            arg = x.args[1]
+            # inlining a QuoteNode or literal into `Expr(:isdefined, x)` is invalid, replace with true
+            if isa(arg, Core.SlotNumber)
+                id = arg.id
+                if 1 <= id <= length(slot_replacements)
+                    replacement = slot_replacements[id]
+                    if isa(replacement, Union{Core.SlotNumber, GlobalRef, Symbol})
+                        return Expr(:isdefined, replacement)
+                    else
+                        @assert !isa(replacement, Expr)
+                        return true
+                    end
+                end
+                return Expr(:isdefined, Core.SlotNumber(id + slot_offset))
+            elseif isexpr(arg, :static_parameter)
+                if isassigned(static_param_values, arg.args[1])
+                    return true
+                end
+                return x
+            else
+                @assert isa(arg, Union{GlobalRef, Symbol})
+                return x
+            end
+        elseif !Core.Compiler.is_meta_expr_head(head)
             partially_inline!(x.args, slot_replacements, type_signature, static_param_values,
                               slot_offset, statement_offset, boundscheck)
         end
@@ -413,6 +450,4 @@ end
 
 _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals)
 
-is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo)
-
 end # module
diff --git a/base/methodshow.jl b/base/methodshow.jl
index 961328c7c9684f..937ff02a1786da 100644
--- a/base/methodshow.jl
+++ b/base/methodshow.jl
@@ -295,6 +295,7 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
 end
 
 show(io::IO, ms::MethodList) = show_method_table(io, ms)
+show(io::IO, ::MIME"text/plain", ms::MethodList) = show_method_table(io, ms)
 show(io::IO, mt::Core.MethodTable) = show_method_table(io, MethodList(mt))
 
 function inbase(m::Module)
diff --git a/base/missing.jl b/base/missing.jl
index 343e8014bbb0ed..3176c567726027 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -12,7 +12,7 @@ where it is not supported. The error message, in the `msg` field
 may provide more specific details.
 """
 struct MissingException <: Exception
-    msg::AbstractString
+    msg::String
 end
 
 showerror(io::IO, ex::MissingException) =
@@ -179,8 +179,8 @@ xor(b::Bool, a::Missing) = missing
 xor(::Missing, ::Integer) = missing
 xor(::Integer, ::Missing) = missing
 
-*(d::Missing, x::AbstractString) = missing
-*(d::AbstractString, x::Missing) = missing
+*(d::Missing, x::Union{AbstractString,AbstractChar}) = missing
+*(d::Union{AbstractString,AbstractChar}, x::Missing) = missing
 
 function float(A::AbstractArray{Union{T, Missing}}) where {T}
     U = typeof(float(zero(T)))
@@ -201,6 +201,8 @@ Use [`collect`](@ref) to obtain an `Array` containing the non-`missing` values i
 be a `Vector` since it is not possible to remove missings while preserving dimensions
 of the input.
 
+See also [`coalesce`](@ref), [`ismissing`](@ref), [`something`](@ref).
+
 # Examples
 ```jldoctest
 julia> x = skipmissing([1, missing, 2])
@@ -281,24 +283,24 @@ mapreduce(f, op, itr::SkipMissing{<:AbstractArray}) =
 
 function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
     A = itr.x
-    local ai
+    ai = missing
     inds = LinearIndices(A)
     i = first(inds)
     ilast = last(inds)
-    while i <= ilast
+    for outer i in i:ilast
         @inbounds ai = A[i]
-        ai === missing || break
-        i += 1
+        ai !== missing && break
     end
-    i > ilast && return mapreduce_empty(f, op, eltype(itr))
+    ai === missing && return mapreduce_empty(f, op, eltype(itr))
     a1::eltype(itr) = ai
+    i == typemax(typeof(i)) && return mapreduce_first(f, op, a1)
     i += 1
-    while i <= ilast
+    ai = missing
+    for outer i in i:ilast
         @inbounds ai = A[i]
-        ai === missing || break
-        i += 1
+        ai !== missing && break
     end
-    i > ilast && return mapreduce_first(f, op, a1)
+    ai === missing && return mapreduce_first(f, op, a1)
     # We know A contains at least two non-missing entries: the result cannot be nothing
     something(mapreduce_impl(f, op, itr, first(inds), last(inds)))
 end
@@ -312,32 +314,35 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
 @noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray},
                                   ifirst::Integer, ilast::Integer, blksize::Int)
     A = itr.x
-    if ifirst == ilast
+    if ifirst > ilast
+        return nothing
+    elseif ifirst == ilast
         @inbounds a1 = A[ifirst]
         if a1 === missing
             return nothing
         else
             return Some(mapreduce_first(f, op, a1))
         end
-    elseif ifirst + blksize > ilast
+    elseif ilast - ifirst < blksize
         # sequential portion
-        local ai
+        ai = missing
         i = ifirst
-        while i <= ilast
+        for outer i in i:ilast
             @inbounds ai = A[i]
-            ai === missing || break
-            i += 1
+            ai !== missing && break
         end
-        i > ilast && return nothing
+        ai === missing && return nothing
         a1 = ai::eltype(itr)
+        i == typemax(typeof(i)) && return Some(mapreduce_first(f, op, a1))
         i += 1
-        while i <= ilast
+        ai = missing
+        for outer i in i:ilast
             @inbounds ai = A[i]
-            ai === missing || break
-            i += 1
+            ai !== missing && break
         end
-        i > ilast && return Some(mapreduce_first(f, op, a1))
+        ai === missing && return Some(mapreduce_first(f, op, a1))
         a2 = ai::eltype(itr)
+        i == typemax(typeof(i)) && return Some(op(f(a1), f(a2)))
         i += 1
         v = op(f(a1), f(a2))
         @simd for i = i:ilast
@@ -349,7 +354,7 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
         return Some(v)
     else
         # pairwise portion
-        imid = (ifirst + ilast) >> 1
+        imid = ifirst + (ilast - ifirst) >> 1
         v1 = mapreduce_impl(f, op, itr, ifirst, imid, blksize)
         v2 = mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
         if v1 === nothing && v2 === nothing
@@ -396,12 +401,12 @@ function filter(f, itr::SkipMissing{<:AbstractArray})
 end
 
 """
-    coalesce(x, y...)
+    coalesce(x...)
 
 Return the first value in the arguments which is not equal to [`missing`](@ref),
 if any. Otherwise return `missing`.
 
-See also [`something`](@ref).
+See also [`skipmissing`](@ref), [`something`](@ref), [`@coalesce`](@ref).
 
 # Examples
 
@@ -423,3 +428,39 @@ function coalesce end
 coalesce() = missing
 coalesce(x::Missing, y...) = coalesce(y...)
 coalesce(x::Any, y...) = x
+
+
+"""
+    @coalesce(x...)
+
+Short-circuiting version of [`coalesce`](@ref).
+
+# Examples
+```jldoctest
+julia> f(x) = (println("f(\$x)"); missing);
+
+julia> a = 1;
+
+julia> a = @coalesce a f(2) f(3) error("`a` is still missing")
+1
+
+julia> b = missing;
+
+julia> b = @coalesce b f(2) f(3) error("`b` is still missing")
+f(2)
+f(3)
+ERROR: `b` is still missing
+[...]
+```
+
+!!! compat "Julia 1.7"
+    This macro is available as of Julia 1.7.
+"""
+macro coalesce(args...)
+    expr = :(missing)
+    for arg in reverse(args)
+        expr = :((val = $arg) !== missing ? val : $expr)
+    end
+    return esc(:(let val; $expr; end))
+end
+
diff --git a/base/mpfr.jl b/base/mpfr.jl
index c85531856f5c9a..0ffb0f50b1034c 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -151,6 +151,11 @@ global precision; `convert` will always return `x`.
 convenience since decimal literals are converted to `Float64` when parsed, so
 `BigFloat(2.1)` may not yield what you expect.
 
+See also:
+- [`@big_str`](@ref)
+- [`rounding`](@ref) and [`setrounding`](@ref)
+- [`precision`](@ref) and [`setprecision`](@ref)
+
 !!! compat "Julia 1.1"
     `precision` as a keyword argument requires at least Julia 1.1.
     In Julia 1.0 `precision` is the second positional argument (`BigFloat(x, precision)`).
@@ -169,11 +174,6 @@ julia> BigFloat("2.1", RoundUp)
 julia> BigFloat("2.1", RoundUp, precision=128)
 2.100000000000000000000000000000000000007
 ```
-
-# See also
-- [`@big_str`](@ref)
-- [`rounding`](@ref) and [`setrounding`](@ref)
-- [`precision`](@ref) and [`setprecision`](@ref)
 """
 BigFloat(x, r::RoundingMode)
 
@@ -339,7 +339,7 @@ Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
 Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
 
 # TODO: avoid double rounding
-Float16(x::BigFloat) = Float16(Float32(x))
+Float16(x::BigFloat) = Float16(Float64(x))
 
 promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
 promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat
@@ -815,8 +815,8 @@ Set the precision (in bits) to be used for `T` arithmetic.
     setting.
 """
 function setprecision(::Type{BigFloat}, precision::Integer)
-    if precision < 2
-        throw(DomainError(precision, "`precision` cannot be less than 2."))
+    if precision < 1
+        throw(DomainError(precision, "`precision` cannot be less than 1."))
     end
     DEFAULT_PRECISION[] = precision
     return precision
@@ -982,7 +982,7 @@ function _prettify_bigfloat(s::String)::String
             neg = startswith(int, '-')
             neg == true && (int = lstrip(int, '-'))
             @assert length(int) == 1
-            string(neg ? '-' : "", '0', '.', '0'^(-expo-1), int, frac)
+            string(neg ? '-' : "", '0', '.', '0'^(-expo-1), int, frac == "0" ? "" : frac)
         end
     else
         string(mantissa, 'e', exponent)
@@ -1021,14 +1021,14 @@ set_emax!(x) = check_exponent_err(ccall((:mpfr_set_emax, :libmpfr), Cint, (Clong
 set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, :libmpfr), Cint, (Clong,), x))
 
 function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
-    haskey(stackdict, x) && return stackdict[x]
-    # d = copy(x._d)
-    d = x._d
-    d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
-    y = _BigFloat(x.prec, x.sign, x.exp, d′)
-    #ccall((:mpfr_custom_move,:libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
-    stackdict[x] = y
-    return y
+    get!(stackdict, x) do
+        # d = copy(x._d)
+        d = x._d
+        d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
+        y = _BigFloat(x.prec, x.sign, x.exp, d′)
+        #ccall((:mpfr_custom_move,:libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
+        return y
+    end
 end
 
 function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 9acb97a486fbb1..c8f06309b9a1a8 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -104,9 +104,9 @@ module IteratorsMD
 
     # zeros and ones
     zero(::CartesianIndex{N}) where {N} = zero(CartesianIndex{N})
-    zero(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(x -> 0, Val(N)))
+    zero(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(Returns(0), Val(N)))
     oneunit(::CartesianIndex{N}) where {N} = oneunit(CartesianIndex{N})
-    oneunit(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(x -> 1, Val(N)))
+    oneunit(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(Returns(1), Val(N)))
 
     # arithmetic, min/max
     @inline (-)(index::CartesianIndex{N}) where {N} =
@@ -125,9 +125,11 @@ module IteratorsMD
 
     # comparison
     @inline isless(I1::CartesianIndex{N}, I2::CartesianIndex{N}) where {N} = _isless(0, I1.I, I2.I)
-    @inline function _isless(ret, I1::NTuple{N,Int}, I2::NTuple{N,Int}) where N
-        newret = ifelse(ret==0, icmp(I1[N], I2[N]), ret)
-        _isless(newret, Base.front(I1), Base.front(I2))
+    @inline function _isless(ret, I1::Tuple{Int,Vararg{Int,N}}, I2::Tuple{Int,Vararg{Int,N}}) where {N}
+        newret = ifelse(ret==0, icmp(last(I1), last(I2)), ret)
+        t1, t2 = Base.front(I1), Base.front(I2)
+        # avoid dynamic dispatch by telling the compiler relational invariants
+        return isa(t1, Tuple{}) ? _isless(newret, (), ()) : _isless(newret, t1, t2::Tuple{Int,Vararg{Int}})
     end
     _isless(ret, ::Tuple{}, ::Tuple{}) = ifelse(ret==1, true, false)
     icmp(a, b) = ifelse(isless(a,b), 1, ifelse(a==b, 0, -1))
@@ -168,6 +170,7 @@ module IteratorsMD
         error("iteration is deliberately unsupported for CartesianIndex. Use `I` rather than `I...`, or use `Tuple(I)...`")
 
     # Iteration
+    const OrdinalRangeInt = OrdinalRange{Int, Int}
     """
         CartesianIndices(sz::Dims) -> R
         CartesianIndices((istart:[istep:]istop, jstart:[jstep:]jstop, ...)) -> R
@@ -262,13 +265,13 @@ module IteratorsMD
 
     For cartesian to linear index conversion, see [`LinearIndices`](@ref).
     """
-    struct CartesianIndices{N,R<:NTuple{N,OrdinalRange{Int, Int}}} <: AbstractArray{CartesianIndex{N},N}
+    struct CartesianIndices{N,R<:NTuple{N,OrdinalRangeInt}} <: AbstractArray{CartesianIndex{N},N}
         indices::R
     end
 
     CartesianIndices(::Tuple{}) = CartesianIndices{0,typeof(())}(())
     function CartesianIndices(inds::NTuple{N,OrdinalRange{<:Integer, <:Integer}}) where {N}
-        indices = map(r->convert(OrdinalRange{Int, Int}, r), inds)
+        indices = map(r->convert(OrdinalRangeInt, r), inds)
         CartesianIndices{N, typeof(indices)}(indices)
     end
 
@@ -278,7 +281,7 @@ module IteratorsMD
 
     CartesianIndices(A::AbstractArray) = CartesianIndices(axes(A))
 
-    _convert2ind(sz::Bool) = Base.OneTo(Int(sz))
+    _convert2ind(sz::Bool) = Base.OneTo(Int8(sz))
     _convert2ind(sz::Integer) = Base.OneTo(sz)
     _convert2ind(sz::AbstractUnitRange) = first(sz):last(sz)
     _convert2ind(sz::OrdinalRange) = first(sz):step(sz):last(sz)
@@ -350,10 +353,34 @@ module IteratorsMD
     # AbstractArray implementation
     Base.axes(iter::CartesianIndices{N,R}) where {N,R} = map(Base.axes1, iter.indices)
     Base.IndexStyle(::Type{CartesianIndices{N,R}}) where {N,R} = IndexCartesian()
+    # getindex for a 0D CartesianIndices is necessary for disambiguation
+    @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R}
+        CartesianIndex()
+    end
     @propagate_inbounds function Base.getindex(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R}
         CartesianIndex(getindex.(iter.indices, I))
     end
 
+    # CartesianIndices act as a multidimensional range, so cartesian indexing of CartesianIndices
+    # with compatible dimensions may be seen as indexing into the component ranges.
+    # This may use the special indexing behavior implemented for ranges to return another CartesianIndices
+    @propagate_inbounds function Base.getindex(iter::CartesianIndices{N,R},
+        I::Vararg{Union{OrdinalRange{<:Integer, <:Integer}, Colon}, N}) where {N,R}
+        CartesianIndices(getindex.(iter.indices, I))
+    end
+    @propagate_inbounds function Base.getindex(iter::CartesianIndices{N},
+        C::CartesianIndices{N}) where {N}
+        CartesianIndices(getindex.(iter.indices, C.indices))
+    end
+
+    # If dimensions permit, we may index into a CartesianIndices directly instead of constructing a SubArray wrapper
+    @propagate_inbounds function Base.view(c::CartesianIndices{N}, r::Vararg{Union{OrdinalRange{<:Integer, <:Integer}, Colon},N}) where {N}
+        getindex(c, r...)
+    end
+    @propagate_inbounds function Base.view(c::CartesianIndices{N}, C::CartesianIndices{N}) where {N}
+        getindex(c, C)
+    end
+
     ndims(R::CartesianIndices) = ndims(typeof(R))
     ndims(::Type{CartesianIndices{N}}) where {N} = N
     ndims(::Type{CartesianIndices{N,TT}}) where {N,TT} = N
@@ -394,19 +421,21 @@ module IteratorsMD
     # `iterate` returns `Union{Nothing, Tuple}`, we explicitly pass a `valid` flag to eliminate
     # the type instability inside the core `__inc` logic, and this gives better runtime performance.
     __inc(::Tuple{}, ::Tuple{}) = false, ()
-    @inline function __inc(state::Tuple{Int}, indices::Tuple{<:OrdinalRange})
+    @inline function __inc(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
         rng = indices[1]
         I = state[1] + step(rng)
         valid = __is_valid_range(I, rng) && state[1] != last(rng)
         return valid, (I, )
     end
-    @inline function __inc(state, indices)
+    @inline function __inc(state::Tuple{Int,Int,Vararg{Int,N}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt,N}}) where {N}
         rng = indices[1]
         I = state[1] + step(rng)
         if __is_valid_range(I, rng) && state[1] != last(rng)
             return true, (I, tail(state)...)
         end
-        valid, I = __inc(tail(state), tail(indices))
+        t1, t2 = tail(state), tail(indices)
+        # avoid dynamic dispatch by telling the compiler relational invariants
+        valid, I = isa(t1, Tuple{Int}) ? __inc(t1, t2::Tuple{OrdinalRangeInt}) : __inc(t1, t2::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         return valid, (first(rng), I...)
     end
 
@@ -458,7 +487,7 @@ module IteratorsMD
 
     # Split out the first N elements of a tuple
     @inline function split(t, V::Val)
-        ref = ntuple(d->true, V)  # create a reference tuple of length N
+        ref = ntuple(Returns(true), V)  # create a reference tuple of length N
         _split1(t, ref), _splitrest(t, ref)
     end
     @inline _split1(t, ref) = (t[1], _split1(tail(t), tail(ref))...)
@@ -505,20 +534,21 @@ module IteratorsMD
 
     # decrement post check to avoid integer overflow
     @inline __dec(::Tuple{}, ::Tuple{}) = false, ()
-    @inline function __dec(state::Tuple{Int}, indices::Tuple{<:OrdinalRange})
+    @inline function __dec(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
         rng = indices[1]
         I = state[1] - step(rng)
         valid = __is_valid_range(I, rng) && state[1] != first(rng)
         return valid, (I,)
     end
-
-    @inline function __dec(state, indices)
+    @inline function __dec(state::Tuple{Int,Int,Vararg{Int,N}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt,N}}) where {N}
         rng = indices[1]
         I = state[1] - step(rng)
         if __is_valid_range(I, rng) && state[1] != first(rng)
             return true, (I, tail(state)...)
         end
-        valid, I = __dec(tail(state), tail(indices))
+        t1, t2 = tail(state), tail(indices)
+        # avoid dynamic dispatch by telling the compiler relational invariants
+        valid, I = isa(t1, Tuple{Int}) ? __dec(t1, t2::Tuple{OrdinalRangeInt}) : __dec(t1, t2::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
         return valid, (last(rng), I...)
     end
 
@@ -621,7 +651,7 @@ using .IteratorsMD
 ## Bounds-checking with CartesianIndex
 # Disallow linear indexing with CartesianIndex
 function checkbounds(::Type{Bool}, A::AbstractArray, i::Union{CartesianIndex, AbstractArray{<:CartesianIndex}})
-    @_inline_meta
+    @inline
     checkbounds_indices(Bool, axes(A), (i,))
 end
 
@@ -685,10 +715,10 @@ checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndices) = all(checkindex.(Boo
 # rather than returning N, it returns an NTuple{N,Bool} so the result is inferrable
 @inline index_ndims(i1, I...) = (true, index_ndims(I...)...)
 @inline function index_ndims(i1::CartesianIndex, I...)
-    (map(x->true, i1.I)..., index_ndims(I...)...)
+    (map(Returns(true), i1.I)..., index_ndims(I...)...)
 end
 @inline function index_ndims(i1::AbstractArray{CartesianIndex{N}}, I...) where N
-    (ntuple(x->true, Val(N))..., index_ndims(I...)...)
+    (ntuple(Returns(true), Val(N))..., index_ndims(I...)...)
 end
 index_ndims() = ()
 
@@ -698,7 +728,7 @@ index_ndims() = ()
 @inline index_dimsum(::Colon, I...) = (true, index_dimsum(I...)...)
 @inline index_dimsum(::AbstractArray{Bool}, I...) = (true, index_dimsum(I...)...)
 @inline function index_dimsum(::AbstractArray{<:Any,N}, I...) where N
-    (ntuple(x->true, Val(N))..., index_dimsum(I...)...)
+    (ntuple(Returns(true), Val(N))..., index_dimsum(I...)...)
 end
 index_dimsum() = ()
 
@@ -822,7 +852,7 @@ uncolon(inds::Tuple{},    I::Tuple{Colon, Vararg{Any}}) = Slice(OneTo(1))
 uncolon(inds::Tuple,      I::Tuple{Colon, Vararg{Any}}) = Slice(inds[1])
 
 ### From abstractarray.jl: Internal multidimensional indexing definitions ###
-getindex(x::Number, i::CartesianIndex{0}) = x
+getindex(x::Union{Number,AbstractChar}, ::CartesianIndex{0}) = x
 getindex(t::Tuple,  i::CartesianIndex{1}) = getindex(t, i.I[1])
 
 # These are not defined on directly on getindex to avoid
@@ -843,14 +873,14 @@ function _unsafe_getindex(::IndexStyle, A::AbstractArray, I::Vararg{Union{Real,
     # This is specifically not inlined to prevent excessive allocations in type unstable code
     shape = index_shape(I...)
     dest = similar(A, shape)
-    map(unsafe_length, axes(dest)) == map(unsafe_length, shape) || throw_checksize_error(dest, shape)
+    map(length, axes(dest)) == map(length, shape) || throw_checksize_error(dest, shape)
     _unsafe_getindex!(dest, A, I...) # usually a generated function, don't allow it to impact inference result
     return dest
 end
 
 function _generate_unsafe_getindex!_body(N::Int)
     quote
-        @_inline_meta
+        @inline
         D = eachindex(dest)
         Dy = iterate(D)
         @inbounds @nloops $N j d->I[d] begin
@@ -883,7 +913,7 @@ end
 
 ## setindex! ##
 function _setindex!(l::IndexStyle, A::AbstractArray, x, I::Union{Real, AbstractArray}...)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     _unsafe_setindex!(l, _maybe_reshape(l, A, I...), x, I...)
     A
@@ -964,7 +994,7 @@ function diff(a::AbstractArray{T,N}; dims::Integer) where {T,N}
 end
 function diff(r::AbstractRange{T}; dims::Integer=1) where {T}
     dims == 1 || throw(ArgumentError("dimension $dims out of range (1:1)"))
-    return T[@inbounds r[i+1] - r[i] for i in firstindex(r):lastindex(r)-1]
+    return [@inbounds r[i+1] - r[i] for i in firstindex(r):lastindex(r)-1]
 end
 
 ### from abstractarray.jl
@@ -1160,6 +1190,8 @@ their indices; any offset results in a (circular) wraparound. If the
 arrays have overlapping indices, then on the domain of the overlap
 `dest` agrees with `src`.
 
+See also: [`circshift`](@ref).
+
 # Examples
 ```julia-repl
 julia> src = reshape(Vector(1:16), (4,4))
@@ -1219,14 +1251,14 @@ end
 
 # contiguous multidimensional indexing: if the first dimension is a range,
 # we can get some performance from using copy_chunks!
-@inline function _unsafe_getindex!(X::BitArray, B::BitArray, I0::Union{UnitRange{Int},Slice})
+@inline function _unsafe_getindex!(X::BitArray, B::BitArray, I0::Union{AbstractUnitRange{Int},Slice})
     copy_chunks!(X.chunks, 1, B.chunks, indexoffset(I0)+1, length(I0))
     return X
 end
 
 # Optimization where the inner dimension is contiguous improves perf dramatically
 @generated function _unsafe_getindex!(X::BitArray, B::BitArray,
-        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Slice}...)
+        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Slice}...)
     N = length(I)
     quote
         $(Expr(:meta, :inline))
@@ -1361,7 +1393,7 @@ end
 # contiguous multidimensional indexing: if the first dimension is a range,
 # we can get some performance from using copy_chunks!
 
-@inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray}, J0::Union{Colon,UnitRange{Int}})
+@inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray}, J0::Union{Colon,AbstractUnitRange{Int}})
     I0 = to_indices(B, (J0,))[1]
     @boundscheck checkbounds(B, I0)
     l0 = length(I0)
@@ -1373,13 +1405,13 @@ end
 end
 
 @inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray},
-        I0::Union{Colon,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Colon}...)
+        I0::Union{Colon,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Colon}...)
     J = to_indices(B, (I0, I...))
     @boundscheck checkbounds(B, J...)
     _unsafe_setindex!(B, X, J...)
 end
 @generated function _unsafe_setindex!(B::BitArray, X::Union{StridedArray,BitArray},
-        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Slice}...)
+        I0::Union{Slice,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Slice}...)
     N = length(I)
     quote
         idxlens = @ncall $N index_lengths I0 d->I[d]
@@ -1414,7 +1446,7 @@ end
 end
 
 @propagate_inbounds function setindex!(B::BitArray, X::AbstractArray,
-        I0::Union{Colon,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Colon}...)
+        I0::Union{Colon,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Colon}...)
     _setindex!(IndexStyle(B), B, X, to_indices(B, (I0, I...))...)
 end
 
diff --git a/base/multimedia.jl b/base/multimedia.jl
index 45e6b9532e9fae..5d9e3e807d159e 100644
--- a/base/multimedia.jl
+++ b/base/multimedia.jl
@@ -69,7 +69,7 @@ methods; for example, if the available MIME formats depend on the *value* of `x`
 julia> showable(MIME("text/plain"), rand(5))
 true
 
-julia> showable("img/png", rand(5))
+julia> showable("image/png", rand(5))
 false
 ```
 """
@@ -176,7 +176,7 @@ data except for a set of types known to be text data (possibly Unicode).
 julia> istextmime(MIME("text/plain"))
 true
 
-julia> istextmime(MIME("img/png"))
+julia> istextmime(MIME("image/png"))
 false
 ```
 """
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index ff554cae393364..9361fde52619bb 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -267,7 +267,7 @@ merge(a::NamedTuple,     b::NamedTuple{()}) = a
 merge(a::NamedTuple{()}, b::NamedTuple{()}) = a
 merge(a::NamedTuple{()}, b::NamedTuple)     = b
 
-merge(a::NamedTuple, b::Iterators.Pairs{<:Any,<:Any,<:Any,<:NamedTuple}) = merge(a, b.data)
+merge(a::NamedTuple, b::Iterators.Pairs{<:Any,<:Any,<:Any,<:NamedTuple}) = merge(a, getfield(b, :data))
 
 merge(a::NamedTuple, b::Iterators.Zip{<:Tuple{Any,Any}}) = merge(a, NamedTuple{Tuple(b.is[1])}(b.is[2]))
 
diff --git a/base/ntuple.jl b/base/ntuple.jl
index a5608dfa927c33..6f70b494812230 100644
--- a/base/ntuple.jl
+++ b/base/ntuple.jl
@@ -32,22 +32,22 @@ julia> ntuple(i -> 2*i, 4)
 end
 
 function _ntuple(f::F, n) where F
-    @_noinline_meta
+    @noinline
     (n >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", n)))
     ([f(i) for i = 1:n]...,)
 end
 
 function ntupleany(f, n)
-    @_noinline_meta
+    @noinline
     (n >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", n)))
     (Any[f(i) for i = 1:n]...,)
 end
 
 # inferrable ntuple (enough for bootstrapping)
 ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@_inline_meta; (f(1),))
-ntuple(f, ::Val{2}) = (@_inline_meta; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@_inline_meta; (f(1), f(2), f(3)))
+ntuple(f, ::Val{1}) = (@inline; (f(1),))
+ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
+ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
 
 """
     ntuple(f, ::Val{N})
diff --git a/base/number.jl b/base/number.jl
index 142796d3903ac5..d3bf14d566250f 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -25,6 +25,8 @@ isinteger(x::Integer) = true
 Return `true` if `x == zero(x)`; if `x` is an array, this checks whether
 all of the elements of `x` are zero.
 
+See also: [`isone`](@ref), [`isinteger`](@ref), [`isfinite`](@ref), [`isnan`](@ref).
+
 # Examples
 ```jldoctest
 julia> iszero(0.0)
@@ -92,15 +94,20 @@ keys(::Number) = OneTo(1)
 
 getindex(x::Number) = x
 function getindex(x::Number, i::Integer)
-    @_inline_meta
+    @inline
     @boundscheck i == 1 || throw(BoundsError())
     x
 end
 function getindex(x::Number, I::Integer...)
-    @_inline_meta
+    @inline
     @boundscheck all(isone, I) || throw(BoundsError())
     x
 end
+get(x::Number, i::Integer, default) = isone(i) ? x : default
+get(x::Number, ind::Tuple, default) = all(isone, ind) ? x : default
+get(f::Callable, x::Number, i::Integer) = isone(i) ? x : f()
+get(f::Callable, x::Number, ind::Tuple) = all(isone, ind) ? x : f()
+
 first(x::Number) = x
 last(x::Number) = x
 copy(x::Number) = x # some code treats numbers as collection-like
@@ -110,6 +117,8 @@ copy(x::Number) = x # some code treats numbers as collection-like
 
 Returns `true` if the value of the sign of `x` is negative, otherwise `false`.
 
+See also [`sign`](@ref) and [`copysign`](@ref).
+
 # Examples
 ```jldoctest
 julia> signbit(-4)
@@ -131,6 +140,23 @@ signbit(x::Real) = x < 0
     sign(x)
 
 Return zero if `x==0` and ``x/|x|`` otherwise (i.e., ±1 for real `x`).
+
+See also [`signbit`](@ref), [`zero`](@ref), [`copysign`](@ref), [`flipsign`](@ref).
+
+# Examples
+```jldoctest
+julia> sign(-4.0)
+-1.0
+
+julia> sign(99)
+1
+
+julia> sign(-0.0)
+-0.0
+
+julia> sign(0 + im)
+0.0 + 1.0im
+```
 """
 sign(x::Number) = iszero(x) ? x/abs(oneunit(x)) : x/abs(x)
 sign(x::Real) = ifelse(x < zero(x), oftype(one(x),-1), ifelse(x > zero(x), one(x), typeof(one(x))(x)))
@@ -222,10 +248,18 @@ inv(x::Number) = one(x)/x
 
 Multiply `x` and `y`, giving the result as a larger type.
 
+See also [`promote`](@ref), [`Base.add_sum`](@ref).
+
 # Examples
 ```jldoctest
-julia> widemul(Float32(3.), 4.)
-12.0
+julia> widemul(Float32(3.0), 4.0) isa BigFloat
+true
+
+julia> typemax(Int8) * typemax(Int8)
+1
+
+julia> widemul(typemax(Int8), typemax(Int8))  # == 127^2
+16129
 ```
 """
 widemul(x::Number, y::Number) = widen(x)*widen(y)
@@ -243,6 +277,8 @@ map(f, x::Number, ys::Number...) = f(x, ys...)
 
 Get the additive identity element for the type of `x` (`x` can also specify the type itself).
 
+See also [`iszero`](@ref), [`one`](@ref), [`oneunit`](@ref), [`oftype`](@ref).
+
 # Examples
 ```jldoctest
 julia> zero(1)
@@ -280,6 +316,9 @@ should return an identity value of the same precision
 If you want a quantity that is of the same type as `x`, or of type `T`,
 even if `x` is dimensionful, use [`oneunit`](@ref) instead.
 
+See also the [`identity`](@ref) function,
+and `I` in [`LinearAlgebra`](@ref man-linalg) for the identity matrix.
+
 # Examples
 ```jldoctest
 julia> one(3.7)
diff --git a/base/operators.jl b/base/operators.jl
index 8b91657fda201c..74cf3e95145a6a 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -93,6 +93,10 @@ and of missing values. `isequal` treats all floating-point `NaN` values as equal
 to each other, treats `-0.0` as unequal to `0.0`, and [`missing`](@ref) as equal
 to `missing`. Always returns a `Bool` value.
 
+`isequal` is an equivalence relation - it is reflexive (`===` implies `isequal`), symmetric
+(`isequal(a, b)` implies `isequal(b, a)`) and transitive (`isequal(a, b)` and
+`isequal(b, c)` implies `isequal(a, c)`).
+
 # Implementation
 The default implementation of `isequal` calls `==`, so a type that does not involve
 floating-point values generally only needs to define `==`.
@@ -101,8 +105,12 @@ floating-point values generally only needs to define `==`.
 that `hash(x) == hash(y)`.
 
 This typically means that types for which a custom `==` or `isequal` method exists must
-implement a corresponding `hash` method (and vice versa). Collections typically implement
-`isequal` by calling `isequal` recursively on all contents.
+implement a corresponding [`hash`](@ref) method (and vice versa). Collections typically
+implement `isequal` by calling `isequal` recursively on all contents.
+
+Furthermore, `isequal` is linked with [`isless`](@ref), and they work together to
+define a fixed total ordering, where exactly one of `isequal(x, y)`, `isless(x, y)`, or
+`isless(y, x)` must be `true` (and the other two `false`).
 
 Scalar types generally do not need to implement `isequal` separate from `==`, unless they
 represent floating-point numbers amenable to a more efficient implementation than that
@@ -121,6 +129,12 @@ true
 
 julia> isequal(0.0, -0.0)
 false
+
+julia> missing == missing
+missing
+
+julia> isequal(missing, missing)
+true
 ```
 """
 isequal(x, y) = x == y
@@ -135,8 +149,8 @@ isequal(x::AbstractFloat, y::Real         ) = (isnan(x) & isnan(y)) | signequal(
 """
     isless(x, y)
 
-Test whether `x` is less than `y`, according to a fixed total order.
-`isless` is not defined on all pairs of values `(x, y)`. However, if it
+Test whether `x` is less than `y`, according to a fixed total order (defined together with
+[`isequal`](@ref)). `isless` is not defined on all pairs of values `(x, y)`. However, if it
 is defined, it is expected to satisfy the following:
 - If `isless(x, y)` is defined, then so is `isless(y, x)` and `isequal(x, y)`,
   and exactly one of those three yields `true`.
@@ -188,7 +202,7 @@ largest values and `isgreater` defines a descending total order with `NaN` and
 !!! note
 
     Like `min`, `isgreater` orders containers (tuples, vectors, etc)
-    lexigraphically with `isless(y, x)` rather than recursively with itself:
+    lexicographically with `isless(y, x)` rather than recursively with itself:
 
     ```jldoctest
     julia> Base.isgreater(1, NaN) # 1 is greater than NaN
@@ -466,7 +480,7 @@ cmp(x::Integer, y::Integer) = ifelse(isless(x, y), -1, ifelse(isless(y, x), 1, 0
 """
     max(x, y, ...)
 
-Return the maximum of the arguments. See also the [`maximum`](@ref) function
+Return the maximum of the arguments (with respect to [`isless`](@ref)). See also the [`maximum`](@ref) function
 to take the maximum element from a collection.
 
 # Examples
@@ -480,7 +494,7 @@ max(x, y) = ifelse(isless(y, x), x, y)
 """
     min(x, y, ...)
 
-Return the minimum of the arguments. See also the [`minimum`](@ref) function
+Return the minimum of the arguments (with respect to [`isless`](@ref)). See also the [`minimum`](@ref) function
 to take the minimum element from a collection.
 
 # Examples
@@ -494,7 +508,9 @@ min(x,y) = ifelse(isless(y, x), y, x)
 """
     minmax(x, y)
 
-Return `(min(x,y), max(x,y))`. See also: [`extrema`](@ref) that returns `(minimum(x), maximum(x))`.
+Return `(min(x,y), max(x,y))`.
+
+See also [`extrema`](@ref) that returns `(minimum(x), maximum(x))`.
 
 # Examples
 ```jldoctest
@@ -563,6 +579,8 @@ extrema(f, x::Real) = (y = f(x); (y, y))
 
 The identity function. Returns its argument.
 
+See also: [`one`](@ref), [`oneunit`](@ref), and [`LinearAlgebra`](@ref man-linalg)'s `I`.
+
 # Examples
 ```jldoctest
 julia> identity("Well, what did you expect?")
@@ -578,6 +596,8 @@ identity(x) = x
 xor(x::Integer) = x
 
 const ⊻ = xor
+const ⊼ = nand
+const ⊽ = nor
 
 # foldl for argument lists. expand fully up to a point, then
 # switch to a loop. this allows small cases like `a+b+c+d` to be managed
@@ -588,7 +608,7 @@ afoldl(op, a) = a
 function afoldl(op, a, bs...)
     l = length(bs)
     i =  0; y = a;            l == i && return y
-    #@nexprs 15 i -> (y = op(y, bs[i]); l == i && return y)
+    #@nexprs 31 i -> (y = op(y, bs[i]); l == i && return y)
     i =  1; y = op(y, bs[i]); l == i && return y
     i =  2; y = op(y, bs[i]); l == i && return y
     i =  3; y = op(y, bs[i]); l == i && return y
@@ -604,12 +624,28 @@ function afoldl(op, a, bs...)
     i = 13; y = op(y, bs[i]); l == i && return y
     i = 14; y = op(y, bs[i]); l == i && return y
     i = 15; y = op(y, bs[i]); l == i && return y
+    i = 16; y = op(y, bs[i]); l == i && return y
+    i = 17; y = op(y, bs[i]); l == i && return y
+    i = 18; y = op(y, bs[i]); l == i && return y
+    i = 19; y = op(y, bs[i]); l == i && return y
+    i = 20; y = op(y, bs[i]); l == i && return y
+    i = 21; y = op(y, bs[i]); l == i && return y
+    i = 22; y = op(y, bs[i]); l == i && return y
+    i = 23; y = op(y, bs[i]); l == i && return y
+    i = 24; y = op(y, bs[i]); l == i && return y
+    i = 25; y = op(y, bs[i]); l == i && return y
+    i = 26; y = op(y, bs[i]); l == i && return y
+    i = 27; y = op(y, bs[i]); l == i && return y
+    i = 28; y = op(y, bs[i]); l == i && return y
+    i = 29; y = op(y, bs[i]); l == i && return y
+    i = 30; y = op(y, bs[i]); l == i && return y
+    i = 31; y = op(y, bs[i]); l == i && return y
     for i in (i + 1):l
         y = op(y, bs[i])
     end
     return y
 end
-typeof(afoldl).name.mt.max_args = 18
+typeof(afoldl).name.mt.max_args = 34
 
 for op in (:+, :*, :&, :|, :xor, :min, :max, :kron)
     @eval begin
@@ -678,16 +714,16 @@ julia> bitstring(Int8(3))
 julia> bitstring(Int8(12))
 "00001100"
 ```
-See also [`>>`](@ref), [`>>>`](@ref).
+See also [`>>`](@ref), [`>>>`](@ref), [`exp2`](@ref), [`ldexp`](@ref).
 """
 function <<(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     typemin(Int) <= c <= typemax(Int) && return x << (c % Int)
     (x >= 0 || c >= 0) && return zero(x) << 0  # for type stability
     oftype(x, -1)
 end
 function <<(x::Integer, c::Unsigned)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(<<, (x, c)))
     end
@@ -726,7 +762,7 @@ julia> bitstring(Int8(-4))
 See also [`>>>`](@ref), [`<<`](@ref).
 """
 function >>(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(>>, (x, c)))
     end
@@ -764,11 +800,11 @@ is equivalent to [`>>`](@ref).
 See also [`>>`](@ref), [`<<`](@ref).
 """
 function >>>(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     typemin(Int) <= c <= typemax(Int) ? x >>> (c % Int) : zero(x) >>> 0
 end
 function >>>(x::Integer, c::Unsigned)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(>>>, (x, c)))
     end
@@ -785,6 +821,8 @@ end
 Remainder from Euclidean division, returning a value of the same sign as `x`, and smaller in
 magnitude than `y`. This value is always exact.
 
+See also: [`div`](@ref), [`mod`](@ref), [`mod1`](@ref), [`divrem`](@ref).
+
 # Examples
 ```jldoctest
 julia> x = 15; y = 4;
@@ -794,6 +832,10 @@ julia> x % y
 
 julia> x == div(x, y) * y + rem(x, y)
 true
+
+julia> rem.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ -2  -1  0  -2  -1  0  1  2  0  1  2
 ```
 """
 rem
@@ -803,7 +845,10 @@ const % = rem
     div(x, y)
     ÷(x, y)
 
-The quotient from Euclidean division. Computes `x/y`, truncated to an integer.
+The quotient from Euclidean (integer) division. Generally equivalent
+to a mathematical operation x/y without a fractional part.
+
+See also: [`cld`](@ref), [`fld`](@ref), [`rem`](@ref), [`divrem`](@ref).
 
 # Examples
 ```jldoctest
@@ -815,6 +860,10 @@ julia> -5 ÷ 3
 
 julia> 5.0 ÷ 2
 2.0
+
+julia> div.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ -1  -1  -1  0  0  0  0  0  1  1  1
 ```
 """
 div
@@ -826,15 +875,24 @@ const ÷ = div
 Modulus after flooring division, returning a value `r` such that `mod(r, y) == mod(x, y)`
 in the range ``(0, y]`` for positive `y` and in the range ``[y,0)`` for negative `y`.
 
-See also: [`fld1`](@ref), [`fldmod1`](@ref).
+With integer arguments and positive `y`, this is equal to `mod(x, 1:y)`, and hence natural
+for 1-based indexing. By comparison, `mod(x, y) == mod(x, 0:y-1)` is natural for computations with
+offsets or strides.
+
+See also [`mod`](@ref), [`fld1`](@ref), [`fldmod1`](@ref).
 
 # Examples
 ```jldoctest
 julia> mod1(4, 2)
 2
 
-julia> mod1(4, 3)
-1
+julia> mod1.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ 1  2  3  1  2  3  1  2  3  1  2
+
+julia> mod1.([-0.1, 0, 0.1, 1, 2, 2.9, 3, 3.1]', 3)
+1×8 Matrix{Float64}:
+ 2.9  3.0  0.1  1.0  2.0  2.9  3.0  0.1
 ```
 """
 mod1(x::T, y::T) where {T<:Real} = (m = mod(x, y); ifelse(m == 0, y, m))
@@ -845,7 +903,7 @@ mod1(x::T, y::T) where {T<:Real} = (m = mod(x, y); ifelse(m == 0, y, m))
 
 Flooring division, returning a value consistent with `mod1(x,y)`
 
-See also: [`mod1`](@ref), [`fldmod1`](@ref).
+See also [`mod1`](@ref), [`fldmod1`](@ref).
 
 # Examples
 ```jldoctest
@@ -872,7 +930,7 @@ end
 
 Return `(fld1(x,y), mod1(x,y))`.
 
-See also: [`fld1`](@ref), [`mod1`](@ref).
+See also [`fld1`](@ref), [`mod1`](@ref).
 """
 fldmod1(x, y) = (fld1(x, y), mod1(x, y))
 
@@ -916,6 +974,42 @@ julia> [1:5;] |> x->x.^2 |> sum |> inv
 """
 |>(x, f) = f(x)
 
+"""
+    f = Returns(value)
+
+Create a callable `f` such that `f(args...; kw...) === value` holds.
+
+# Examples
+
+```jldoctest
+julia> f = Returns(42);
+
+julia> f(1)
+42
+
+julia> f("hello", x=32)
+42
+
+julia> f.value
+42
+```
+
+!!! compat "Julia 1.7"
+    Returns requires at least Julia 1.7.
+"""
+struct Returns{V} <: Function
+    value::V
+    Returns{V}(value) where {V} = new{V}(value)
+    Returns(value) = new{Core.Typeof(value)}(value)
+end
+
+(obj::Returns)(args...; kw...) = obj.value
+function show(io::IO, obj::Returns)
+    show(io, typeof(obj))
+    print(io, "(")
+    show(io, obj.value)
+    print(io, ")")
+end
 # function composition
 
 """
@@ -955,7 +1049,7 @@ julia> fs = [
 julia> ∘(fs...)(3)
 3.0
 ```
-See also [`ComposedFunction`](@ref).
+See also [`ComposedFunction`](@ref), [`!f::Function`](@ref).
 """
 function ∘ end
 
@@ -1015,6 +1109,8 @@ end
 Predicate function negation: when the argument of `!` is a function, it returns a
 function which computes the boolean negation of `f`.
 
+See also [`∘`](@ref).
+
 # Examples
 ```jldoctest
 julia> str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
@@ -1035,6 +1131,8 @@ julia> filter(!isletter, str)
 A type representing a partially-applied version of the two-argument function
 `f`, with the first argument fixed to the value "x". In other words,
 `Fix1(f, x)` behaves similarly to `y->f(x, y)`.
+
+See also [`Fix2`](@ref Base.Fix2).
 """
 struct Fix1{F,T} <: Function
     f::F
@@ -1295,6 +1393,8 @@ julia> [1, 2] .∈ ([2, 3],)
  0
  1
 ```
+
+See also: [`insorted`](@ref), [`contains`](@ref), [`occursin`](@ref), [`issubset`](@ref).
 """
 in
 
diff --git a/base/options.jl b/base/options.jl
index 9ef4f78e908247..16dc884e8651ec 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -21,6 +21,7 @@ struct JLOptions
     code_coverage::Int8
     malloc_log::Int8
     opt_level::Int8
+    opt_level_min::Int8
     debug_level::Int8
     check_bounds::Int8
     depwarn::Int8
diff --git a/base/pair.jl b/base/pair.jl
index 30fd91892ce4b5..b5dffbb4e7e866 100644
--- a/base/pair.jl
+++ b/base/pair.jl
@@ -1,18 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-struct Pair{A, B}
-    first::A
-    second::B
-    function Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B}
-        @_inline_meta
-        # if we didn't inline this, it's probably because the callsite was actually dynamic
-        # to avoid potentially compiling many copies of this, we mark the arguments with `@nospecialize`
-        # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
-        # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
-        return new(a, b)
-    end
-end
-Pair(a, b) = Pair{typeof(a), typeof(b)}(a, b)
 const => = Pair
 
 """
@@ -23,7 +10,7 @@ Construct a `Pair` object with type `Pair{typeof(x), typeof(y)}`. The elements
 are stored in the fields `first` and `second`. They can also be accessed via
 iteration (but a `Pair` is treated as a single "scalar" for broadcasting operations).
 
-See also: [`Dict`](@ref)
+See also [`Dict`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/base/path.jl b/base/path.jl
index 449085c00f26dd..6f74bff25ba26f 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -36,7 +36,7 @@ elseif Sys.iswindows()
 
     function splitdrive(path::String)
         m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"s, path)
-        String(m.captures[1]), String(m.captures[2])
+        String(something(m.captures[1])), String(something(m.captures[2]))
     end
 else
     error("path primitives for this OS need to be defined")
@@ -159,7 +159,7 @@ julia> dirname("/home/myuser/")
 "/home/myuser"
 ```
 
-See also: [`basename`](@ref)
+See also [`basename`](@ref).
 """
  dirname(path::AbstractString) = splitdir(path)[1]
 
@@ -181,15 +181,15 @@ julia> basename("/home/myuser/")
 ""
 ```
 
-See also: [`dirname`](@ref)
+See also [`dirname`](@ref).
 """
 basename(path::AbstractString) = splitdir(path)[2]
 
 """
     splitext(path::AbstractString) -> (AbstractString, AbstractString)
 
-If the last component of a path contains a dot, split the path into everything before the
-dot and everything including and after the dot. Otherwise, return a tuple of the argument
+If the last component of a path contains one or more dots, split the path into everything before the
+last dot and everything including and after the dot. Otherwise, return a tuple of the argument
 unmodified and the empty string. "splitext" is short for "split extension".
 
 # Examples
@@ -197,15 +197,18 @@ unmodified and the empty string. "splitext" is short for "split extension".
 julia> splitext("/home/myuser/example.jl")
 ("/home/myuser/example", ".jl")
 
-julia> splitext("/home/myuser/example")
-("/home/myuser/example", "")
+julia> splitext("/home/myuser/example.tar.gz")
+("/home/myuser/example.tar", ".gz")
+
+julia> splitext("/home/my.user/example")
+("/home/my.user/example", "")
 ```
 """
 function splitext(path::String)
     a, b = splitdrive(path)
     m = match(path_ext_splitter, b)
     m === nothing && return (path,"")
-    a*m.captures[1], String(m.captures[2])
+    (a*something(m.captures[1])), String(something(m.captures[2]))
 end
 
 # NOTE: deprecated in 1.4
@@ -251,16 +254,19 @@ function splitpath(p::String)
     return out
 end
 
-joinpath(path::AbstractString)::String = path
-
 if Sys.iswindows()
 
-function joinpath(path::AbstractString, paths::AbstractString...)::String
-    result_drive, result_path = splitdrive(path)
+function joinpath(paths::Union{Tuple, AbstractVector})::String
+    assertstring(x) = x isa AbstractString || throw(ArgumentError("path component is not a string: $(repr(x))"))
+
+    isempty(paths) && throw(ArgumentError("collection of path components must be non-empty"))
+    assertstring(paths[1])
+    result_drive, result_path = splitdrive(paths[1])
 
-    local p_drive, p_path
-    for p in paths
-        p_drive, p_path = splitdrive(p)
+    p_path = ""
+    for i in firstindex(paths)+1:lastindex(paths)
+        assertstring(paths[i])
+        p_drive, p_path = splitdrive(paths[i])
 
         if startswith(p_path, ('\\', '/'))
             # second path is absolute
@@ -296,8 +302,15 @@ end
 
 else
 
-function joinpath(path::AbstractString, paths::AbstractString...)::String
-    for p in paths
+function joinpath(paths::Union{Tuple, AbstractVector})::String
+    assertstring(x) = x isa AbstractString || throw(ArgumentError("path component is not a string: $(repr(x))"))
+
+    isempty(paths) && throw(ArgumentError("collection of path components must be non-empty"))
+    assertstring(paths[1])
+    path = paths[1]
+    for i in firstindex(paths)+1:lastindex(paths)
+        p = paths[i]
+        assertstring(p)
         if isabspath(p)
             path = p
         elseif isempty(path) || path[end] == '/'
@@ -311,8 +324,12 @@ end
 
 end # os-test
 
+joinpath(paths::AbstractString...)::String = joinpath(paths)
+
 """
     joinpath(parts::AbstractString...) -> String
+    joinpath(parts::Vector{AbstractString}) -> String
+    joinpath(parts::Tuple{AbstractString}) -> String
 
 Join path components into a full path. If some argument is an absolute path or
 (on Windows) has a drive specification that doesn't match the drive computed for
@@ -328,6 +345,11 @@ letter casing, hence `joinpath("C:\\A","c:b") = "C:\\A\\b"`.
 julia> joinpath("/home/myuser", "example.jl")
 "/home/myuser/example.jl"
 ```
+
+```jldoctest
+julia> joinpath(["/home/myuser", "example.jl"])
+"/home/myuser/example.jl"
+```
 """
 joinpath
 
@@ -517,12 +539,16 @@ function relpath(path::String, startpath::String = ".")
     curdir = "."
     pardir = ".."
     path == startpath && return curdir
-    path_drive, path_without_drive = splitdrive(path)
-    startpath_drive, startpath_without_drive = splitdrive(startpath)
-    path_arr  = split(abspath(path_without_drive),      path_separator_re)
-    start_arr = split(abspath(startpath_without_drive), path_separator_re)
     if Sys.iswindows()
-        lowercase(path_drive) != lowercase(startpath_drive) && return abspath(path)
+        path_drive, path_without_drive = splitdrive(path)
+        startpath_drive, startpath_without_drive = splitdrive(startpath)
+        isempty(startpath_drive) && (startpath_drive = path_drive) # by default assume same as path drive
+        uppercase(path_drive) == uppercase(startpath_drive) || return abspath(path) # if drives differ return first path
+        path_arr  = split(abspath(path_drive * path_without_drive),      path_separator_re)
+        start_arr = split(abspath(path_drive * startpath_without_drive), path_separator_re)
+    else
+        path_arr  = split(abspath(path),      path_separator_re)
+        start_arr = split(abspath(startpath), path_separator_re)
     end
     i = 0
     while i < min(length(path_arr), length(start_arr))
diff --git a/base/pcre.jl b/base/pcre.jl
index e52bea2869eb79..a8edaaa089c317 100644
--- a/base/pcre.jl
+++ b/base/pcre.jl
@@ -228,7 +228,10 @@ function substring_length_bynumber(match_data, number)
     s = RefValue{Csize_t}()
     rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
                (Ptr{Cvoid}, Cint, Ref{Csize_t}), match_data, number, s)
-    rc < 0 && error("PCRE error: $(err_message(rc))")
+    if rc < 0
+        rc == ERROR_UNSET && return 0
+        error("PCRE error: $(err_message(rc))")
+    end
     return Int(s[])
 end
 
diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl
index 429fa67b2a3aba..ea966c44efc38b 100644
--- a/base/permuteddimsarray.jl
+++ b/base/permuteddimsarray.jl
@@ -24,7 +24,7 @@ Given an AbstractArray `A`, create a view `B` such that the
 dimensions appear to be permuted. Similar to `permutedims`, except
 that no copying occurs (`B` shares storage with `A`).
 
-See also: [`permutedims`](@ref).
+See also [`permutedims`](@ref), [`invperm`](@ref).
 
 # Examples
 ```jldoctest
@@ -83,10 +83,10 @@ end
 """
     permutedims(A::AbstractArray, perm)
 
-Permute the dimensions of array `A`. `perm` is a vector specifying a permutation of length
-`ndims(A)`.
+Permute the dimensions of array `A`. `perm` is a vector or a tuple of length `ndims(A)`
+specifying the permutation.
 
-See also: [`PermutedDimsArray`](@ref).
+See also [`permutedims!`](@ref), [`PermutedDimsArray`](@ref), [`transpose`](@ref), [`invperm`](@ref).
 
 # Examples
 ```jldoctest
@@ -100,7 +100,7 @@ julia> A = reshape(Vector(1:8), (2,2,2))
  5  7
  6  8
 
-julia> permutedims(A, [3, 2, 1])
+julia> permutedims(A, (3, 2, 1))
 2×2×2 Array{Int64, 3}:
 [:, :, 1] =
  1  3
@@ -109,6 +109,16 @@ julia> permutedims(A, [3, 2, 1])
 [:, :, 2] =
  2  4
  6  8
+
+julia> B = randn(5, 7, 11, 13);
+
+julia> perm = [4,1,3,2];
+
+julia> size(permutedims(B, perm))
+(13, 5, 11, 7)
+
+julia> size(B)[perm] == ans
+true
 ```
 """
 function permutedims(A::AbstractArray, perm)
@@ -253,6 +263,16 @@ end
     P
 end
 
+function Base._mapreduce_dim(f, op, init::Base._InitialValue, A::PermutedDimsArray, dims::Colon)
+    Base._mapreduce_dim(f, op, init, parent(A), dims)
+end
+
+function Base.mapreducedim!(f, op, B::AbstractArray{T,N}, A::PermutedDimsArray{T,N,perm,iperm}) where {T,N,perm,iperm}
+    C = PermutedDimsArray{T,N,iperm,perm,typeof(B)}(B) # make the inverse permutation for the output
+    Base.mapreducedim!(f, op, C, parent(A))
+    B
+end
+
 function Base.showarg(io::IO, A::PermutedDimsArray{T,N,perm}, toplevel) where {T,N,perm}
     print(io, "PermutedDimsArray(")
     Base.showarg(io, parent(A), false)
diff --git a/base/pointer.jl b/base/pointer.jl
index 0813d0a0c97350..b9475724f76374 100644
--- a/base/pointer.jl
+++ b/base/pointer.jl
@@ -125,7 +125,7 @@ Convert a `Ptr` to an object reference. Assumes the pointer refers to a valid he
 Julia object. If this is not the case, undefined behavior results, hence this function is
 considered "unsafe" and should be used with care.
 
-See also: [`pointer_from_objref`](@ref).
+See also [`pointer_from_objref`](@ref).
 """
 unsafe_pointer_to_objref(x::Ptr) = ccall(:jl_value_ptr, Any, (Ptr{Cvoid},), x)
 
@@ -139,11 +139,11 @@ remains referenced for the whole time that the `Ptr` will be used.
 This function may not be called on immutable objects, since they do not have
 stable memory addresses.
 
-See also: [`unsafe_pointer_to_objref`](@ref).
+See also [`unsafe_pointer_to_objref`](@ref).
 """
 function pointer_from_objref(@nospecialize(x))
-    @_inline_meta
-    typeof(x).mutable || error("pointer_from_objref cannot be used on immutable objects")
+    @inline
+    ismutable(x) || error("pointer_from_objref cannot be used on immutable objects")
     ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), x)
 end
 
diff --git a/base/process.jl b/base/process.jl
index 0c7db8a405d202..10c173e82b34c9 100644
--- a/base/process.jl
+++ b/base/process.jl
@@ -74,26 +74,29 @@ const SpawnIOs = Vector{Any} # convenience name for readability
 # handle marshalling of `Cmd` arguments from Julia to C
 @noinline function _spawn_primitive(file, cmd::Cmd, stdio::SpawnIOs)
     loop = eventloop()
-    iohandles = Tuple{Cint, UInt}[ # assuming little-endian layout
-        let h = rawhandle(io)
-            h === C_NULL     ? (0x00, UInt(0)) :
-            h isa OS_HANDLE  ? (0x02, UInt(cconvert(@static(Sys.iswindows() ? Ptr{Cvoid} : Cint), h))) :
-            h isa Ptr{Cvoid} ? (0x04, UInt(h)) :
-            error("invalid spawn handle $h from $io")
-        end
-        for io in stdio]
-    handle = Libc.malloc(_sizeof_uv_process)
-    disassociate_julia_struct(handle) # ensure that data field is set to C_NULL
-    err = ccall(:jl_spawn, Int32,
-              (Cstring, Ptr{Cstring}, Ptr{Cvoid}, Ptr{Cvoid},
-               Ptr{Tuple{Cint, UInt}}, Int,
-               UInt32, Ptr{Cstring}, Cstring, Ptr{Cvoid}),
-        file, cmd.exec, loop, handle,
-        iohandles, length(iohandles),
-        cmd.flags,
-        cmd.env === nothing ? C_NULL : cmd.env,
-        isempty(cmd.dir) ? C_NULL : cmd.dir,
-        @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
+    GC.@preserve stdio begin
+        iohandles = Tuple{Cint, UInt}[ # assuming little-endian layout
+            let h = rawhandle(io)
+                h === C_NULL     ? (0x00, UInt(0)) :
+                h isa OS_HANDLE  ? (0x02, UInt(cconvert(@static(Sys.iswindows() ? Ptr{Cvoid} : Cint), h))) :
+                h isa Ptr{Cvoid} ? (0x04, UInt(h)) :
+                error("invalid spawn handle $h from $io")
+            end
+            for io in stdio]
+        handle = Libc.malloc(_sizeof_uv_process)
+        disassociate_julia_struct(handle) # ensure that data field is set to C_NULL
+        (; exec, flags, env, dir) = cmd
+        err = ccall(:jl_spawn, Int32,
+                  (Cstring, Ptr{Cstring}, Ptr{Cvoid}, Ptr{Cvoid},
+                   Ptr{Tuple{Cint, UInt}}, Int,
+                   UInt32, Ptr{Cstring}, Cstring, Ptr{Cvoid}),
+            file, exec, loop, handle,
+            iohandles, length(iohandles),
+            flags,
+            env === nothing ? C_NULL : env,
+            isempty(dir) ? C_NULL : dir,
+            @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
+    end
     if err != 0
         ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), handle) # will call free on handle eventually
         throw(_UVError("could not spawn " * repr(cmd), err))
@@ -209,10 +212,10 @@ function setup_stdio(stdio::PipeEndpoint, child_readable::Bool)
         rd, wr = link_pipe(!child_readable, child_readable)
         try
             open_pipe!(stdio, child_readable ? wr : rd)
-        catch ex
+        catch
             close_pipe_sync(rd)
             close_pipe_sync(wr)
-            rethrow(ex)
+            rethrow()
         end
         child = child_readable ? rd : wr
         return (child, true)
@@ -251,18 +254,19 @@ function setup_stdio(stdio::FileRedirect, child_readable::Bool)
     return (io, true)
 end
 
-# incrementally move data between an IOBuffer and a system Pipe
+# incrementally move data between an arbitrary IO and a system Pipe,
+# including copying the EOF (shutdown) when finished
 # TODO: probably more efficient (when valid) to use `stdio` directly as the
 #       PipeEndpoint buffer field in some cases
-function setup_stdio(stdio::Union{IOBuffer, BufferStream}, child_readable::Bool)
+function setup_stdio(stdio::IO, child_readable::Bool)
     parent = PipeEndpoint()
     rd, wr = link_pipe(!child_readable, child_readable)
     try
         open_pipe!(parent, child_readable ? wr : rd)
-    catch ex
+    catch
         close_pipe_sync(rd)
         close_pipe_sync(wr)
-        rethrow(ex)
+        rethrow()
     end
     child = child_readable ? rd : wr
     try
@@ -271,24 +275,19 @@ function setup_stdio(stdio::Union{IOBuffer, BufferStream}, child_readable::Bool)
             @async try
                 write(in, out)
             catch ex
-                @warn "Process error" exception=(ex, catch_backtrace())
+                @warn "Process I/O error" exception=(ex, catch_backtrace())
             finally
                 close(parent)
+                child_readable || closewrite(stdio)
             end
         end
-    catch ex
+    catch
         close_pipe_sync(child)
-        rethrow(ex)
+        rethrow()
     end
     return (child, true)
 end
 
-function setup_stdio(io, child_readable::Bool)
-    # if there is no specialization,
-    # assume that rawhandle is defined for it
-    return (io, false)
-end
-
 close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
 close_stdio(stdio) = close(stdio)
 
@@ -383,9 +382,10 @@ end
 """
     open(f::Function, command, args...; kwargs...)
 
-Similar to `open(command, args...; kwargs...)`, but calls `f(stream)` on the resulting process
-stream, then closes the input stream and waits for the process to complete.
-Returns the value returned by `f`.
+Similar to `open(command, args...; kwargs...)`, but calls `f(stream)` on the
+resulting process stream, then closes the input stream and waits for the process
+to complete. Return the value returned by `f` on success. Throw an error if the
+process failed, or if the process attempts to print anything to stdout.
 """
 function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
     P = open(cmds, args...; kwargs...)
@@ -393,9 +393,13 @@ function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
         f(P)
     catch
         kill(P)
+        close(P)
         rethrow()
-    finally
-        close(P.in)
+    end
+    close(P.in)
+    if !eof(P.out)
+        close(P.out)
+        throw(_UVError("open(do)", UV_EPIPE))
     end
     success(P) || pipeline_error(P)
     return ret
@@ -476,7 +480,7 @@ function test_success(proc::Process)
         #TODO: this codepath is not currently tested
         throw(_UVError("could not start process " * repr(proc.cmd), proc.exitcode))
     end
-    return proc.exitcode == 0 && (proc.termsignal == 0 || proc.termsignal == SIGPIPE)
+    return proc.exitcode == 0 && proc.termsignal == 0
 end
 
 function success(x::Process)
diff --git a/base/promotion.jl b/base/promotion.jl
index a6e6d8fdcac4e8..21245f0e05c70b 100644
--- a/base/promotion.jl
+++ b/base/promotion.jl
@@ -5,7 +5,6 @@
 """
     typejoin(T, S)
 
-
 Return the closest common ancestor of `T` and `S`, i.e. the narrowest type from which
 they both inherit.
 """
@@ -144,6 +143,17 @@ end
 Compute a type that contains both `T` and `S`, which could be
 either a parent of both types, or a `Union` if appropriate.
 Falls back to [`typejoin`](@ref).
+
+See instead [`promote`](@ref), [`promote_type`](@ref).
+
+# Examples
+```jldoctest
+julia> Base.promote_typejoin(Int, Float64)
+Real
+
+julia> Base.promote_type(Int, Float64)
+Float64
+```
 """
 function promote_typejoin(@nospecialize(a), @nospecialize(b))
     c = typejoin(_promote_typesubtract(a), _promote_typesubtract(b))
@@ -151,6 +161,50 @@ function promote_typejoin(@nospecialize(a), @nospecialize(b))
 end
 _promote_typesubtract(@nospecialize(a)) = typesplit(a, Union{Nothing, Missing})
 
+function promote_typejoin_union(::Type{T}) where T
+    if T === Union{}
+        return Union{}
+    elseif T isa UnionAll
+        return Any # TODO: compute more precise bounds
+    elseif T isa Union
+        return promote_typejoin(promote_typejoin_union(T.a), promote_typejoin_union(T.b))
+    elseif T <: Tuple
+        return typejoin_union_tuple(T)
+    else
+        return T
+    end
+end
+
+function typejoin_union_tuple(T::Type)
+    @_pure_meta
+    u = Base.unwrap_unionall(T)
+    u isa Union && return typejoin(
+            typejoin_union_tuple(Base.rewrap_unionall(u.a, T)),
+            typejoin_union_tuple(Base.rewrap_unionall(u.b, T)))
+    p = (u::DataType).parameters
+    lr = length(p)::Int
+    if lr == 0
+        return Tuple{}
+    end
+    c = Vector{Any}(undef, lr)
+    for i = 1:lr
+        pi = p[i]
+        U = Core.Compiler.unwrapva(pi)
+        if U === Union{}
+            ci = Union{}
+        elseif U isa Union
+            ci = typejoin(U.a, U.b)
+        else
+            ci = U
+        end
+        if i == lr && Core.Compiler.isvarargtype(pi)
+            c[i] = isdefined(pi, :N) ? Vararg{ci, pi.N} : Vararg{ci}
+        else
+            c[i] = ci
+        end
+    end
+    return Base.rewrap_unionall(Tuple{c...}, T)
+end
 
 # Returns length, isfixed
 function full_va_len(p)
@@ -191,6 +245,9 @@ tolerated; for example, `promote_type(Int64, Float64)` returns
 [`Float64`](@ref) even though strictly, not all [`Int64`](@ref) values can be
 represented exactly as `Float64` values.
 
+See also: [`promote`](@ref), [`promote_typejoin`](@ref), [`promote_rule`](@ref).
+
+# Examples
 ```jldoctest
 julia> promote_type(Int64, Float64)
 Float64
@@ -210,12 +267,17 @@ Float16
 julia> promote_type(Int8, UInt16)
 UInt16
 ```
+
+!!! warning "Don't overload this directly"
+    To overload promotion for your own types you should overload [`promote_rule`](@ref).
+    `promote_type` calls `promote_rule` internally to determine the type.
+    Overloading `promote_type` directly can cause ambiguity errors.
 """
 function promote_type end
 
 promote_type()  = Bottom
 promote_type(T) = T
-promote_type(T, S, U, V...) = (@_inline_meta; promote_type(T, promote_type(S, U, V...)))
+promote_type(T, S, U, V...) = (@inline; promote_type(T, promote_type(S, U, V...)))
 
 promote_type(::Type{Bottom}, ::Type{Bottom}) = Bottom
 promote_type(::Type{T}, ::Type{T}) where {T} = T
@@ -223,7 +285,7 @@ promote_type(::Type{T}, ::Type{Bottom}) where {T} = T
 promote_type(::Type{Bottom}, ::Type{T}) where {T} = T
 
 function promote_type(::Type{T}, ::Type{S}) where {T,S}
-    @_inline_meta
+    @inline
     # Try promote_rule in both orders. Typically only one is defined,
     # and there is a fallback returning Bottom below, so the common case is
     #   promote_type(T, S) =>
@@ -243,10 +305,10 @@ function promote_rule end
 
 promote_rule(::Type{<:Any}, ::Type{<:Any}) = Bottom
 
-promote_result(::Type{<:Any},::Type{<:Any},::Type{T},::Type{S}) where {T,S} = (@_inline_meta; promote_type(T,S))
+promote_result(::Type{<:Any},::Type{<:Any},::Type{T},::Type{S}) where {T,S} = (@inline; promote_type(T,S))
 # If no promote_rule is defined, both directions give Bottom. In that
 # case use typejoin on the original types instead.
-promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} = (@_inline_meta; typejoin(T, S))
+promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} = (@inline; typejoin(T, S))
 
 """
     promote(xs...)
@@ -254,6 +316,8 @@ promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} =
 Convert all arguments to a common type, and return them all (as a tuple).
 If no arguments can be converted, an error is raised.
 
+See also: [`promote_type`], [`promote_rule`].
+
 # Examples
 ```jldoctest
 julia> promote(Int8(1), Float16(4.5), Float32(4.1))
@@ -263,19 +327,19 @@ julia> promote(Int8(1), Float16(4.5), Float32(4.1))
 function promote end
 
 function _promote(x::T, y::S) where {T,S}
-    @_inline_meta
+    @inline
     R = promote_type(T, S)
     return (convert(R, x), convert(R, y))
 end
 promote_typeof(x) = typeof(x)
-promote_typeof(x, xs...) = (@_inline_meta; promote_type(typeof(x), promote_typeof(xs...)))
+promote_typeof(x, xs...) = (@inline; promote_type(typeof(x), promote_typeof(xs...)))
 function _promote(x, y, z)
-    @_inline_meta
+    @inline
     R = promote_typeof(x, y, z)
     return (convert(R, x), convert(R, y), convert(R, z))
 end
 function _promote(x, y, zs...)
-    @_inline_meta
+    @inline
     R = promote_typeof(x, y, zs...)
     return (convert(R, x), convert(R, y), convert(Tuple{Vararg{R}}, zs)...)
 end
@@ -287,13 +351,13 @@ promote() = ()
 promote(x) = (x,)
 
 function promote(x, y)
-    @_inline_meta
+    @inline
     px, py = _promote(x, y)
     not_sametype((x,y), (px,py))
     px, py
 end
 function promote(x, y, z)
-    @_inline_meta
+    @inline
     px, py, pz = _promote(x, y, z)
     not_sametype((x,y,z), (px,py,pz))
     px, py, pz
@@ -311,7 +375,7 @@ not_sametype(x::T, y::T) where {T} = sametype_error(x)
 not_sametype(x, y) = nothing
 
 function sametype_error(input)
-    @_noinline_meta
+    @noinline
     error("promotion of types ",
           join(map(x->string(typeof(x)), input), ", ", " and "),
           " failed to change any arguments")
@@ -335,7 +399,7 @@ where usually `^ == Base.^` unless `^` has been defined in the calling
 namespace.) If `y` is a negative integer literal, then `Base.literal_pow`
 transforms the operation to `inv(x)^-y` by default, where `-y` is positive.
 
-
+# Examples
 ```jldoctest
 julia> 3^5
 243
diff --git a/base/range.jl b/base/range.jl
index 54aeef84cae191..5d4421689c27aa 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -24,9 +24,9 @@
 _colon(::Ordered, ::Any, start::T, step, stop::T) where {T} = StepRange(start, step, stop)
 # for T<:Union{Float16,Float32,Float64} see twiceprecision.jl
 _colon(::Ordered, ::ArithmeticRounds, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Int, (stop-start)/step)+1)
+    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
 _colon(::Any, ::Any, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Int, (stop-start)/step)+1)
+    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
 
 """
     (:)(start, [step], stop)
@@ -57,7 +57,10 @@ Mathematically a range is uniquely determined by any three of `start`, `step`, `
 Valid invocations of range are:
 * Call `range` with any three of `start`, `step`, `stop`, `length`.
 * Call `range` with two of `start`, `stop`, `length`. In this case `step` will be assumed
-to be one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
+  to be one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
+* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be one.
+
+See Extended Help for additional details on the returned type.
 
 # Examples
 ```jldoctest
@@ -87,6 +90,15 @@ julia> range(stop=10, step=1, length=5)
 
 julia> range(start=1, step=1, stop=10)
 1:1:10
+
+julia> range(; length = 10)
+Base.OneTo(10)
+
+julia> range(; stop = 6)
+Base.OneTo(6)
+
+julia> range(; stop = 6.5)
+1.0:1.0:6.0
 ```
 If `length` is not specified and `stop - start` is not an integer multiple of `step`, a range that ends before `stop` will be produced.
 ```jldoctest
@@ -103,6 +115,23 @@ To avoid this induced overhead, see the [`LinRange`](@ref) constructor.
 !!! compat "Julia 1.7"
     The versions without keyword arguments and `start` as a keyword argument
     require at least Julia 1.7.
+
+!!! compat "Julia 1.8"
+    The versions with `stop` as a sole keyword argument,
+    or `length` as a sole keyword argument require at least Julia 1.8.
+
+
+# Extended Help
+
+`range` will produce a `Base.OneTo` when the arguments are Integers and
+* Only `length` is provided
+* Only `stop` is provided
+
+`range` will produce a `UnitRange` when the arguments are Integers and
+* Only `start`  and `stop` are provided
+* Only `length` and `stop` are provided
+
+A `UnitRange` is not produced if `step` is provided even if specified as one.
 """
 function range end
 
@@ -115,8 +144,8 @@ range(;start=nothing, stop=nothing, length::Union{Integer, Nothing}=nothing, ste
     _range(start, step, stop, length)
 
 _range(start::Nothing, step::Nothing, stop::Nothing, len::Nothing) = range_error(start, step, stop, len)
-_range(start::Nothing, step::Nothing, stop::Nothing, len::Any    ) = range_error(start, step, stop, len)
-_range(start::Nothing, step::Nothing, stop::Any    , len::Nothing) = range_error(start, step, stop, len)
+_range(start::Nothing, step::Nothing, stop::Nothing, len::Any    ) = range_length(len)
+_range(start::Nothing, step::Nothing, stop::Any    , len::Nothing) = range_stop(stop)
 _range(start::Nothing, step::Nothing, stop::Any    , len::Any    ) = range_stop_length(stop, len)
 _range(start::Nothing, step::Any    , stop::Nothing, len::Nothing) = range_error(start, step, stop, len)
 _range(start::Nothing, step::Any    , stop::Nothing, len::Any    ) = range_error(start, step, stop, len)
@@ -131,6 +160,14 @@ _range(start::Any    , step::Any    , stop::Nothing, len::Any    ) = range_start
 _range(start::Any    , step::Any    , stop::Any    , len::Nothing) = range_start_step_stop(start, step, stop)
 _range(start::Any    , step::Any    , stop::Any    , len::Any    ) = range_error(start, step, stop, len)
 
+# Length as the only argument
+range_length(len::Integer) = OneTo(len)
+
+# Stop as the only argument
+range_stop(stop) = range_start_stop(oneunit(stop), stop)
+range_stop(stop::Integer) = range_length(stop)
+
+# Stop and length as the only argument
 range_stop_length(a::Real,          len::Integer) = UnitRange{typeof(a)}(oftype(a, a-len+1), a)
 range_stop_length(a::AbstractFloat, len::Integer) = range_step_stop_length(oftype(a, 1), a, len)
 range_stop_length(a,                len::Integer) = range_step_stop_length(oftype(a-a, 1), a, len)
@@ -367,7 +404,7 @@ unitrange(x) = UnitRange(x)
 if isdefined(Main, :Base)
     # Constant-fold-able indexing into tuples to functionally expose Base.tail and Base.front
     function getindex(@nospecialize(t::Tuple), r::AbstractUnitRange)
-        @_inline_meta
+        @inline
         require_one_based_indexing(r)
         if length(r) <= 10
             return ntuple(i -> t[i + first(r) - 1], length(r))
@@ -393,15 +430,15 @@ be 1.
 struct OneTo{T<:Integer} <: AbstractUnitRange{T}
     stop::T
     function OneTo{T}(stop) where {T<:Integer}
-        throwbool(r)  = (@_noinline_meta; throw(ArgumentError("invalid index: $r of type Bool")))
+        throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
         T === Bool && throwbool(stop)
         return new(max(zero(T), stop))
     end
 
     function OneTo{T}(r::AbstractRange) where {T<:Integer}
-        throwstart(r) = (@_noinline_meta; throw(ArgumentError("first element must be 1, got $(first(r))")))
-        throwstep(r)  = (@_noinline_meta; throw(ArgumentError("step must be 1, got $(step(r))")))
-        throwbool(r)  = (@_noinline_meta; throw(ArgumentError("invalid index: $r of type Bool")))
+        throwstart(r) = (@noinline; throw(ArgumentError("first element must be 1, got $(first(r))")))
+        throwstep(r)  = (@noinline; throw(ArgumentError("step must be 1, got $(step(r))")))
+        throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
         first(r) == 1 || throwstart(r)
         step(r)  == 1 || throwstep(r)
         T === Bool && throwbool(r)
@@ -415,10 +452,11 @@ oneto(r) = OneTo(r)
 ## Step ranges parameterized by length
 
 """
-    StepRangeLen{T,R,S}(ref::R, step::S, len, [offset=1]) where {T,R,S}
-    StepRangeLen(       ref::R, step::S, len, [offset=1]) where {  R,S}
+    StepRangeLen(         ref::R, step::S, len, [offset=1]) where {  R,S}
+    StepRangeLen{T,R,S}(  ref::R, step::S, len, [offset=1]) where {T,R,S}
+    StepRangeLen{T,R,S,L}(ref::R, step::S, len, [offset=1]) where {T,R,S,L}
 
-A range `r` where `r[i]` produces values of type `T` (in the second
+A range `r` where `r[i]` produces values of type `T` (in the first
 form, `T` is deduced automatically), parameterized by a `ref`erence
 value, a `step`, and the `len`gth. By default `ref` is the starting
 value `r[1]`, but alternatively you can supply it as the value of
@@ -426,48 +464,53 @@ value `r[1]`, but alternatively you can supply it as the value of
 with `TwicePrecision` this can be used to implement ranges that are
 free of roundoff error.
 """
-struct StepRangeLen{T,R,S} <: AbstractRange{T}
+struct StepRangeLen{T,R,S,L<:Integer} <: AbstractRange{T}
     ref::R       # reference value (might be smallest-magnitude value in the range)
     step::S      # step value
-    len::Int     # length of the range
-    offset::Int  # the index of ref
+    len::L       # length of the range
+    offset::L    # the index of ref
 
-    function StepRangeLen{T,R,S}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S}
+    function StepRangeLen{T,R,S,L}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S,L}
         if T <: Integer && !isinteger(ref + step)
             throw(ArgumentError("StepRangeLen{<:Integer} cannot have non-integer step"))
         end
-        len >= 0 || throw(ArgumentError("length cannot be negative, got $len"))
-        1 <= offset <= max(1,len) || throw(ArgumentError("StepRangeLen: offset must be in [1,$len], got $offset"))
-        new(ref, step, len, offset)
+        len = convert(L, len)
+        len >= zero(len) || throw(ArgumentError("length cannot be negative, got $len"))
+        offset = convert(L, offset)
+        L1 = oneunit(typeof(len))
+        L1 <= offset <= max(L1, len) || throw(ArgumentError("StepRangeLen: offset must be in [1,$len], got $offset"))
+        return new(ref, step, len, offset)
     end
 end
 
+StepRangeLen{T,R,S}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S} =
+    StepRangeLen{T,R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 StepRangeLen(ref::R, step::S, len::Integer, offset::Integer = 1) where {R,S} =
-    StepRangeLen{typeof(ref+zero(step)),R,S}(ref, step, len, offset)
+    StepRangeLen{typeof(ref+zero(step)),R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 StepRangeLen{T}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S} =
-    StepRangeLen{T,R,S}(ref, step, len, offset)
+    StepRangeLen{T,R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 
 ## range with computed step
 
 """
-    LinRange{T}
+    LinRange{T,L}
 
 A range with `len` linearly spaced elements between its `start` and `stop`.
 The size of the spacing is controlled by `len`, which must
-be an `Int`.
+be an `Integer`.
 
 # Examples
 ```jldoctest
 julia> LinRange(1.5, 5.5, 9)
-9-element LinRange{Float64}:
+9-element LinRange{Float64, Int64}:
  1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5
 ```
 
 Compared to using [`range`](@ref), directly constructing a `LinRange` should
 have less overhead but won't try to correct for floating point errors:
-```julia
+```jldoctest
 julia> collect(range(-0.1, 0.3, length=5))
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.1
   0.0
   0.1
@@ -475,7 +518,7 @@ julia> collect(range(-0.1, 0.3, length=5))
   0.3
 
 julia> collect(LinRange(-0.1, 0.3, 5))
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.1
  -1.3877787807814457e-17
   0.09999999999999999
@@ -483,26 +526,35 @@ julia> collect(LinRange(-0.1, 0.3, 5))
   0.3
 ```
 """
-struct LinRange{T} <: AbstractRange{T}
+struct LinRange{T,L<:Integer} <: AbstractRange{T}
     start::T
     stop::T
-    len::Int
-    lendiv::Int
+    len::L
+    lendiv::L
 
-    function LinRange{T}(start,stop,len) where T
+    function LinRange{T,L}(start::T, stop::T, len::L) where {T,L<:Integer}
         len >= 0 || throw(ArgumentError("range($start, stop=$stop, length=$len): negative length"))
-        if len == 1
+        onelen = oneunit(typeof(len))
+        if len == onelen
             start == stop || throw(ArgumentError("range($start, stop=$stop, length=$len): endpoints differ"))
-            return new(start, stop, 1, 1)
+            return new(start, stop, len, len)
         end
-        lendiv = max(len-1, 1)
+        lendiv = max(len - onelen, onelen)
         if T <: Integer && !iszero(mod(stop-start, lendiv))
             throw(ArgumentError("LinRange{<:Integer} cannot have non-integer step"))
         end
-        new(start,stop,len,lendiv)
+        return new(start, stop, len, lendiv)
     end
 end
 
+function LinRange{T,L}(start, stop, len::Integer) where {T,L}
+    LinRange{T,L}(convert(T, start), convert(T, stop), convert(L, len))
+end
+
+function LinRange{T}(start, stop, len::Integer) where T
+    LinRange{T,promote_type(Int,typeof(len))}(start, stop, len)
+end
+
 function LinRange(start, stop, len::Integer)
     T = typeof((stop-start)/len)
     LinRange{T}(start, stop, len)
@@ -510,7 +562,7 @@ end
 
 range_start_stop_length(start, stop, len::Integer) =
     range_start_stop_length(promote(start, stop)..., len)
-range_start_stop_length(start::T, stop::T, len::Integer) where {T} = LinRange{T}(start, stop, len)
+range_start_stop_length(start::T, stop::T, len::Integer) where {T} = LinRange(start, stop, len)
 range_start_stop_length(start::T, stop::T, len::Integer) where {T<:Integer} =
     _linspace(float(T), start, stop, len)
 ## for Float16, Float32, and Float64 we hit twiceprecision.jl to lift to higher precision StepRangeLen
@@ -563,31 +615,34 @@ function print_range(io::IO, r::AbstractRange,
     maxpossiblecols = div(screenwidth, 1+sepsize) # assume each element is at least 1 char + 1 separator
     colsr = n <= maxpossiblecols ? (1:n) : [1:div(maxpossiblecols,2)+1; (n-div(maxpossiblecols,2)):n]
     rowmatrix = reshape(r[colsr], 1, length(colsr)) # treat the range as a one-row matrix for print_matrix_row
-    A = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), screenwidth, screenwidth, sepsize) # how much space range takes
+    nrow, idxlast = size(rowmatrix, 2), last(axes(rowmatrix, 2))
+    A = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), screenwidth, screenwidth, sepsize, nrow) # how much space range takes
     if n <= length(A) # cols fit screen, so print out all elements
         print(io, pre) # put in pre chars
-        print_matrix_row(io,rowmatrix,A,1,1:n,sep) # the entire range
+        print_matrix_row(io,rowmatrix,A,1,1:n,sep,idxlast) # the entire range
         print(io, post) # add the post characters
     else # cols don't fit so put horiz ellipsis in the middle
         # how many chars left after dividing width of screen in half
         # and accounting for the horiz ellipsis
         c = div(screenwidth-length(hdots)+1,2)+1 # chars remaining for each side of rowmatrix
-        alignR = reverse(alignment(io, rowmatrix, 1:m, length(rowmatrix):-1:1, c, c, sepsize)) # which cols of rowmatrix to put on the right
+        alignR = reverse(alignment(io, rowmatrix, 1:m, length(rowmatrix):-1:1, c, c, sepsize, nrow)) # which cols of rowmatrix to put on the right
         c = screenwidth - sum(map(sum,alignR)) - (length(alignR)-1)*sepsize - length(hdots)
-        alignL = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), c, c, sepsize) # which cols of rowmatrix to put on the left
+        alignL = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), c, c, sepsize, nrow) # which cols of rowmatrix to put on the left
         print(io, pre)   # put in pre chars
-        print_matrix_row(io, rowmatrix,alignL,1,1:length(alignL),sep) # left part of range
+        print_matrix_row(io, rowmatrix,alignL,1,1:length(alignL),sep,idxlast) # left part of range
         print(io, hdots) # horizontal ellipsis
-        print_matrix_row(io, rowmatrix,alignR,1,length(rowmatrix)-length(alignR)+1:length(rowmatrix),sep) # right part of range
+        print_matrix_row(io, rowmatrix,alignR,1,length(rowmatrix)-length(alignR)+1:length(rowmatrix),sep,idxlast) # right part of range
         print(io, post)  # post chars
     end
 end
 
 ## interface implementations
 
+length(r::AbstractRange) = error("length implementation missing") # catch mistakes
 size(r::AbstractRange) = (length(r),)
 
 isempty(r::StepRange) =
+    # steprange_last_empty(r.start, r.step, r.stop) == r.stop
     (r.start != r.stop) & ((r.step > zero(r.step)) != (r.stop > r.start))
 isempty(r::AbstractUnitRange) = first(r) > last(r)
 isempty(r::StepRangeLen) = length(r) == 0
@@ -614,68 +669,135 @@ julia> step(range(2.5, stop=10.9, length=85))
 ```
 """
 step(r::StepRange) = r.step
-step(r::AbstractUnitRange{T}) where{T} = oneunit(T) - zero(T)
+step(r::AbstractUnitRange{T}) where {T} = oneunit(T) - zero(T)
 step(r::StepRangeLen) = r.step
 step(r::StepRangeLen{T}) where {T<:AbstractFloat} = T(r.step)
 step(r::LinRange) = (last(r)-first(r))/r.lendiv
 
+# high-precision step
 step_hp(r::StepRangeLen) = r.step
 step_hp(r::AbstractRange) = step(r)
 
-unsafe_length(r::AbstractRange) = length(r)  # generic fallback
-
-function unsafe_length(r::StepRange)
-    n = Integer(div((r.stop - r.start) + r.step, r.step))
-    isempty(r) ? zero(n) : n
-end
-length(r::StepRange) = unsafe_length(r)
-unsafe_length(r::AbstractUnitRange) = Integer(last(r) - first(r) + step(r))
-unsafe_length(r::OneTo) = Integer(r.stop - zero(r.stop))
-length(r::AbstractUnitRange) = unsafe_length(r)
-length(r::OneTo) = unsafe_length(r)
-length(r::StepRangeLen) = r.len
-length(r::LinRange) = r.len
+axes(r::AbstractRange) = (oneto(length(r)),)
 
 # Needed to fold the `firstindex` call in SimdLoop.simd_index
 firstindex(::UnitRange) = 1
 firstindex(::StepRange) = 1
 firstindex(::LinRange) = 1
 
-function length(r::StepRange{T}) where T<:Union{Int,UInt,Int64,UInt64,Int128,UInt128}
-    isempty(r) && return zero(T)
-    if r.step > 1
-        return checked_add(convert(T, div(unsigned(r.stop - r.start), r.step)), one(T))
-    elseif r.step < -1
-        return checked_add(convert(T, div(unsigned(r.start - r.stop), -r.step)), one(T))
-    elseif r.step > 0
-        return checked_add(div(checked_sub(r.stop, r.start), r.step), one(T))
+# n.b. checked_length for these is defined iff checked_add and checked_sub are
+# defined between the relevant types
+function checked_length(r::OrdinalRange{T}) where T
+    s = step(r)
+    # s != 0, by construction, but avoids the division error later
+    start = first(r)
+    if s == zero(s) || isempty(r)
+        return Integer(start - start + zero(s))
+    end
+    stop = last(r)
+    if isless(s, zero(s))
+        diff = checked_sub(start, stop)
+        s = -s
+    else
+        diff = checked_sub(stop, start)
+    end
+    a = div(diff, s)
+    return Integer(checked_add(a, oneunit(a)))
+end
+
+function checked_length(r::AbstractUnitRange{T}) where T
+    # compiler optimization: remove dead cases from above
+    if isempty(r)
+        return Integer(first(r) - first(r))
+    end
+    a = checked_sub(last(r), first(r))
+    return Integer(checked_add(a, oneunit(a)))
+end
+
+function length(r::OrdinalRange{T}) where T
+    s = step(r)
+    # s != 0, by construction, but avoids the division error later
+    start = first(r)
+    if s == zero(s) || isempty(r)
+        return Integer(div(start-start, oneunit(s)))
+    end
+    stop = last(r)
+    if isless(s, zero(s))
+        diff = start - stop
+        s = -s
     else
-        return checked_add(div(checked_sub(r.start, r.stop), -r.step), one(T))
+        diff = stop - start
     end
+    a = div(diff, s)
+    return Integer(a + oneunit(a))
 end
 
-function length(r::AbstractUnitRange{T}) where T<:Union{Int,Int64,Int128}
-    @_inline_meta
-    checked_add(checked_sub(last(r), first(r)), one(T))
+function length(r::AbstractUnitRange{T}) where T
+    @inline
+    a = last(r) - first(r) # even when isempty, by construction (with overflow)
+    return Integer(a + oneunit(a))
 end
-length(r::OneTo{T}) where {T<:Union{Int,Int64}} = T(r.stop)
 
-length(r::AbstractUnitRange{T}) where {T<:Union{UInt,UInt64,UInt128}} =
-    r.stop < r.start ? zero(T) : checked_add(last(r) - first(r), one(T))
+length(r::OneTo) = Integer(r.stop - zero(r.stop))
+length(r::StepRangeLen) = r.len
+length(r::LinRange) = r.len
 
-# some special cases to favor default Int type
-let smallint = (Int === Int64 ?
-                Union{Int8,UInt8,Int16,UInt16,Int32,UInt32} :
-                Union{Int8,UInt8,Int16,UInt16})
-    global length
-
-    function length(r::StepRange{<:smallint})
-        isempty(r) && return Int(0)
-        div(Int(r.stop)+Int(r.step) - Int(r.start), Int(r.step))
+let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}
+    global length, checked_length
+    # compile optimization for which promote_type(T, Int) == T
+    length(r::OneTo{T}) where {T<:bigints} = r.stop
+    # slightly more accurate length and checked_length in extreme cases
+    # (near typemax) for types with known `unsigned` functions
+    function length(r::OrdinalRange{T}) where T<:bigints
+        s = step(r)
+        s == zero(s) && return zero(T) # unreachable, by construction, but avoids the error case here later
+        isempty(r) && return zero(T)
+        diff = last(r) - first(r)
+        # if |s| > 1, diff might have overflowed, but unsigned(diff)÷s should
+        # therefore still be valid (if the result is representable at all)
+        # n.b. !(s isa T)
+        if s isa Unsigned || -1 <= s <= 1 || s == -s
+            a = div(diff, s)
+        elseif s < 0
+            a = div(unsigned(-diff), -s) % typeof(diff)
+        else
+            a = div(unsigned(diff), s) % typeof(diff)
+        end
+        return Integer(a) + oneunit(a)
     end
+    function checked_length(r::OrdinalRange{T}) where T<:bigints
+        s = step(r)
+        s == zero(s) && return zero(T) # unreachable, by construction, but avoids the error case here later
+        isempty(r) && return zero(T)
+        stop, start = last(r), first(r)
+        # n.b. !(s isa T)
+        if s > 1
+            diff = stop - start
+            a = convert(T, div(unsigned(diff), s))
+        elseif s < -1
+            diff = start - stop
+            a = convert(T, div(unsigned(diff), -s))
+        elseif s > 0
+            a = div(checked_sub(stop, start), s)
+        else
+            a = div(checked_sub(start, stop), -s)
+        end
+        return checked_add(a, oneunit(a))
+    end
+end
 
-    length(r::AbstractUnitRange{<:smallint}) = Int(last(r)) - Int(first(r)) + 1
-    length(r::OneTo{<:smallint}) = Int(r.stop)
+# some special cases to favor default Int type
+let smallints = (Int === Int64 ?
+                Union{Int8, UInt8, Int16, UInt16, Int32, UInt32} :
+                Union{Int8, UInt8, Int16, UInt16})
+    global length, checked_length
+    # n.b. !(step isa T)
+    length(r::OrdinalRange{<:smallints}) = div(Int(last(r)) - Int(first(r)), step(r)) + 1
+    length(r::AbstractUnitRange{<:smallints}) = Int(last(r)) - Int(first(r)) + 1
+    length(r::OneTo{<:smallints}) = Int(r.stop)
+    checked_length(r::OrdinalRange{<:smallints}) = length(r)
+    checked_length(r::AbstractUnitRange{<:smallints}) = length(r)
+    checked_length(r::OneTo{<:smallints}) = length(r)
 end
 
 first(r::OrdinalRange{T}) where {T} = convert(T, r.start)
@@ -705,7 +827,7 @@ function argmin(r::AbstractRange)
     elseif step(r) > 0
         firstindex(r)
     else
-        first(searchsorted(r, last(r)))
+        lastindex(r)
     end
 end
 
@@ -720,7 +842,7 @@ function argmax(r::AbstractRange)
     if isempty(r)
         throw(ArgumentError("range must be non-empty"))
     elseif step(r) > 0
-        first(searchsorted(r, last(r)))
+        lastindex(r)
     else
         firstindex(r)
     end
@@ -734,16 +856,17 @@ copy(r::AbstractRange) = r
 
 ## iteration
 
-function iterate(r::Union{LinRange,StepRangeLen}, i::Int=1)
-    @_inline_meta
+function iterate(r::Union{StepRangeLen,LinRange}, i::Integer=zero(length(r)))
+    @inline
+    i += oneunit(i)
     length(r) < i && return nothing
-    unsafe_getindex(r, i), i + 1
+    unsafe_getindex(r, i), i
 end
 
 iterate(r::OrdinalRange) = isempty(r) ? nothing : (first(r), first(r))
 
 function iterate(r::OrdinalRange{T}, i) where {T}
-    @_inline_meta
+    @inline
     i == last(r) && return nothing
     next = convert(T, i + step(r))
     (next, next)
@@ -754,7 +877,7 @@ end
 _in_unit_range(v::UnitRange, val, i::Integer) = i > 0 && val <= v.stop && val >= v.start
 
 function getindex(v::UnitRange{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     val = convert(T, v.start + (i - 1))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
@@ -765,7 +888,7 @@ const OverflowSafe = Union{Bool,Int8,Int16,Int32,Int64,Int128,
                            UInt8,UInt16,UInt32,UInt64,UInt128}
 
 function getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     val = v.start + (i - 1)
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
@@ -773,14 +896,14 @@ function getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
 end
 
 function getindex(v::OneTo{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     @boundscheck ((i > 0) & (i <= v.stop)) || throw_boundserror(v, i)
     convert(T, i)
 end
 
 function getindex(v::AbstractRange{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     ret = convert(T, first(v) + (i - 1)*step_hp(v))
     ok = ifelse(step(v) > zero(step(v)),
@@ -791,7 +914,7 @@ function getindex(v::AbstractRange{T}, i::Integer) where T
 end
 
 function getindex(r::Union{StepRangeLen,LinRange}, i::Integer)
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     @boundscheck checkbounds(r, i)
     unsafe_getindex(r, i)
@@ -816,7 +939,7 @@ function unsafe_getindex(r::LinRange, i::Integer)
 end
 
 function lerpi(j::Integer, d::Integer, a::T, b::T) where T
-    @_inline_meta
+    @inline
     t = j/d
     T((1-t)*a + t*b)
 end
@@ -824,58 +947,38 @@ end
 getindex(r::AbstractRange, ::Colon) = copy(r)
 
 function getindex(r::AbstractUnitRange, s::AbstractUnitRange{T}) where {T<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        if length(s) == 0
-            return r
-        elseif length(s) == 1
-            if first(s)
-                return r
-            else
-                return range(r[1], length=0)
-            end
-        else # length(s) == 2
-            return range(r[2], length=1)
-        end
+        range(first(s) ? first(r) : last(r), length = Integer(last(s)))
     else
         f = first(r)
-        st = oftype(f, f + first(s)-1)
+        st = oftype(f, f + first(s)-firstindex(r))
         return range(st, length=length(s))
     end
 end
 
 function getindex(r::OneTo{T}, s::OneTo) where T
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
     OneTo(T(s.stop))
 end
 
 function getindex(r::AbstractUnitRange, s::StepRange{T}) where {T<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
     if T === Bool
-        if length(s) == 0
-            return range(first(r), step=one(eltype(r)), length=0)
-        elseif length(s) == 1
-            if first(s)
-                return range(first(r), step=one(eltype(r)), length=1)
-            else
-                return range(first(r), step=one(eltype(r)), length=0)
-            end
-        else # length(s) == 2
-            return range(r[2], step=one(eltype(r)), length=1)
-        end
+        range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length = Integer(last(s)))
     else
-        st = oftype(first(r), first(r) + s.start-1)
+        st = oftype(first(r), first(r) + s.start-firstindex(r))
         return range(st, step=step(s), length=length(s))
     end
 end
 
 function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
     if T === Bool
@@ -888,7 +991,7 @@ function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
                 return range(first(r), step=step(r), length=0)
             end
         else # length(s) == 2
-            return range(r[2], step=step(r), length=1)
+            return range(last(r), step=step(r), length=1)
         end
     else
         st = oftype(r.start, r.start + (first(s)-1)*step(r))
@@ -897,56 +1000,71 @@ function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
 end
 
 function getindex(r::StepRangeLen{T}, s::OrdinalRange{S}) where {T, S<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
+    len = length(s)
+    sstep = step_hp(s)
+    rstep = step_hp(r)
+    L = typeof(len)
     if S === Bool
-        if length(s) == 0
-            return StepRangeLen{T}(first(r), step(r), 0, 1)
-        elseif length(s) == 1
+        rstep *= one(sstep)
+        if len == 0
+            return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
+        elseif len == 1
             if first(s)
-                return StepRangeLen{T}(first(r), step(r), 1, 1)
+                return StepRangeLen{T}(first(r), rstep, oneunit(L), oneunit(L))
             else
-                return StepRangeLen{T}(first(r), step(r), 0, 1)
+                return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
             end
-        else # length(s) == 2
-            return StepRangeLen{T}(r[2], step(r), 1, 1)
+        else # len == 2
+            return StepRangeLen{T}(last(r), rstep, oneunit(L), oneunit(L))
         end
     else
         # Find closest approach to offset by s
         ind = LinearIndices(s)
-        offset = max(min(1 + round(Int, (r.offset - first(s))/step(s)), last(ind)), first(ind))
-        ref = _getindex_hiprec(r, first(s) + (offset-1)*step(s))
-        return StepRangeLen{T}(ref, r.step*step(s), length(s), offset)
+        offset = L(max(min(1 + round(L, (r.offset - first(s))/sstep), last(ind)), first(ind)))
+        ref = _getindex_hiprec(r, first(s) + (offset-1)*sstep)
+        return StepRangeLen{T}(ref, rstep*sstep, len, offset)
     end
 end
 
 function getindex(r::LinRange{T}, s::OrdinalRange{S}) where {T, S<:Integer}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
 
+    len = length(s)
+    L = typeof(len)
     if S === Bool
-        if length(s) == 0
-            return LinRange(first(r), first(r), 0)
-        elseif length(s) == 1
+        if len == 0
+            return LinRange{T}(first(r), first(r), len)
+        elseif len == 1
             if first(s)
-                return LinRange(first(r), first(r), 1)
+                return LinRange{T}(first(r), first(r), len)
             else
-                return LinRange(first(r), first(r), 0)
+                return LinRange{T}(first(r), first(r), zero(L))
             end
         else # length(s) == 2
-            return LinRange(r[2], r[2], 1)
+            return LinRange{T}(last(r), last(r), oneunit(L))
         end
     else
         vfirst = unsafe_getindex(r, first(s))
         vlast  = unsafe_getindex(r, last(s))
-        return LinRange{T}(vfirst, vlast, length(s))
+        return LinRange{T}(vfirst, vlast, len)
     end
 end
 
 show(io::IO, r::AbstractRange) = print(io, repr(first(r)), ':', repr(step(r)), ':', repr(last(r)))
 show(io::IO, r::UnitRange) = print(io, repr(first(r)), ':', repr(last(r)))
 show(io::IO, r::OneTo) = print(io, "Base.OneTo(", r.stop, ")")
+function show(io::IO, r::StepRangeLen)
+    if step(r) != 0
+        print(io, repr(first(r)), ':', repr(step(r)), ':', repr(last(r)))
+    else
+        # ugly temporary printing, to avoid 0:0:0 etc.
+        print(io, "StepRangeLen(", repr(first(r)), ", ", repr(step(r)), ", ", repr(length(r)), ")")
+    end
+end
 
 function ==(r::T, s::T) where {T<:AbstractRange}
     isempty(r) && return isempty(s)
@@ -960,6 +1078,11 @@ function ==(r::OrdinalRange, s::OrdinalRange)
     (first(r) == first(s)) & (step(r) == step(s)) & (last(r) == last(s))
 end
 
+==(r::AbstractUnitRange, s::AbstractUnitRange) =
+    (isempty(r) & isempty(s)) | ((first(r) == first(s)) & (last(r) == last(s)))
+
+==(r::OneTo, s::OneTo) = last(r) == last(s)
+
 ==(r::T, s::T) where {T<:Union{StepRangeLen,LinRange}} =
     (isempty(r) & isempty(s)) | ((first(r) == first(s)) & (length(r) == length(s)) & (last(r) == last(s)))
 
@@ -1058,6 +1181,16 @@ function intersect(r::StepRange, s::StepRange)
     step(r) < zero(step(r)) ? StepRange{T,S}(n, -a, m) : StepRange{T,S}(m, a, n)
 end
 
+function intersect(r1::AbstractRange, r2::AbstractRange)
+    # To iterate over the shorter range
+    length(r1) > length(r2) && return intersect(r2, r1)
+
+    r1 = unique(r1)
+    T = promote_eltype(r1, r2)
+
+    return T[x for x in r1 if x ∈ r2]
+end
+
 function intersect(r1::AbstractRange, r2::AbstractRange, r3::AbstractRange, r::AbstractRange...)
     i = intersect(intersect(r1, r2), r3)
     for t in r
@@ -1096,8 +1229,8 @@ issubset(r::AbstractUnitRange{<:Integer}, s::AbstractUnitRange{<:Integer}) =
 ## linear operations on ranges ##
 
 -(r::OrdinalRange) = range(-first(r), step=-step(r), length=length(r))
--(r::StepRangeLen{T,R,S}) where {T,R,S} =
-    StepRangeLen{T,R,S}(-r.ref, -r.step, length(r), r.offset)
+-(r::StepRangeLen{T,R,S,L}) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(-r.ref, -r.step, r.len, r.offset)
 function -(r::LinRange)
     start = -r.start
     LinRange{typeof(start)}(start, -r.stop, length(r))
@@ -1111,12 +1244,12 @@ el_same(::Type{T}, a::Type{<:AbstractArray{S,n}}, b::Type{<:AbstractArray{T,n}})
 el_same(::Type, a, b) = promote_typejoin(a, b)
 
 promote_rule(a::Type{UnitRange{T1}}, b::Type{UnitRange{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
+    el_same(promote_type(T1, T2), a, b)
 UnitRange{T}(r::UnitRange{T}) where {T<:Real} = r
 UnitRange{T}(r::UnitRange) where {T<:Real} = UnitRange{T}(r.start, r.stop)
 
 promote_rule(a::Type{OneTo{T1}}, b::Type{OneTo{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
+    el_same(promote_type(T1, T2), a, b)
 OneTo{T}(r::OneTo{T}) where {T<:Integer} = r
 OneTo{T}(r::OneTo) where {T<:Integer} = OneTo{T}(r.stop)
 
@@ -1134,11 +1267,11 @@ OrdinalRange{T1, T2}(r::AbstractUnitRange{T1}) where {T1, T2<:Integer} = r
 OrdinalRange{T1, T2}(r::UnitRange) where {T1, T2<:Integer} = UnitRange{T1}(r)
 OrdinalRange{T1, T2}(r::OneTo) where {T1, T2<:Integer} = OneTo{T1}(r)
 
-promote_rule(::Type{StepRange{T1a,T1b}}, ::Type{StepRange{T2a,T2b}}) where {T1a,T1b,T2a,T2b} =
-    el_same(promote_type(T1a,T2a),
-            # el_same only operates on array element type, so just promote second type parameter
-            StepRange{T1a, promote_type(T1b,T2b)},
-            StepRange{T2a, promote_type(T1b,T2b)})
+function promote_rule(::Type{StepRange{T1a,T1b}}, ::Type{StepRange{T2a,T2b}}) where {T1a,T1b,T2a,T2b}
+    Tb = promote_type(T1b, T2b)
+    # el_same only operates on array element type, so just promote second type parameter
+    el_same(promote_type(T1a, T2a), StepRange{T1a,Tb}, StepRange{T2a,Tb})
+end
 StepRange{T1,T2}(r::StepRange{T1,T2}) where {T1,T2} = r
 
 promote_rule(a::Type{StepRange{T1a,T1b}}, ::Type{UR}) where {T1a,T1b,UR<:AbstractUnitRange} =
@@ -1149,35 +1282,38 @@ StepRange(r::AbstractUnitRange{T}) where {T} =
     StepRange{T,T}(first(r), step(r), last(r))
 (StepRange{T1,T2} where T1)(r::AbstractRange) where {T2} = StepRange{eltype(r),T2}(r)
 
-promote_rule(::Type{StepRangeLen{T1,R1,S1}},::Type{StepRangeLen{T2,R2,S2}}) where {T1,T2,R1,R2,S1,S2} =
-    el_same(promote_type(T1,T2),
-            StepRangeLen{T1,promote_type(R1,R2),promote_type(S1,S2)},
-            StepRangeLen{T2,promote_type(R1,R2),promote_type(S1,S2)})
-StepRangeLen{T,R,S}(r::StepRangeLen{T,R,S}) where {T,R,S} = r
-StepRangeLen{T,R,S}(r::StepRangeLen) where {T,R,S} =
-    StepRangeLen{T,R,S}(convert(R, r.ref), convert(S, r.step), length(r), r.offset)
+function promote_rule(::Type{StepRangeLen{T1,R1,S1,L1}},::Type{StepRangeLen{T2,R2,S2,L2}}) where {T1,T2,R1,R2,S1,S2,L1,L2}
+    R, S, L = promote_type(R1, R2), promote_type(S1, S2), promote_type(L1, L2)
+    el_same(promote_type(T1, T2), StepRangeLen{T1,R,S,L}, StepRangeLen{T2,R,S,L})
+end
+StepRangeLen{T,R,S,L}(r::StepRangeLen{T,R,S,L}) where {T,R,S,L} = r
+StepRangeLen{T,R,S,L}(r::StepRangeLen) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(convert(R, r.ref), convert(S, r.step), convert(L, r.len), convert(L, r.offset))
 StepRangeLen{T}(r::StepRangeLen) where {T} =
-    StepRangeLen(convert(T, r.ref), convert(T, r.step), length(r), r.offset)
+    StepRangeLen(convert(T, r.ref), convert(T, r.step), r.len, r.offset)
 
-promote_rule(a::Type{StepRangeLen{T,R,S}}, ::Type{OR}) where {T,R,S,OR<:AbstractRange} =
-    promote_rule(a, StepRangeLen{eltype(OR), eltype(OR), eltype(OR)})
-StepRangeLen{T,R,S}(r::AbstractRange) where {T,R,S} =
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+promote_rule(a::Type{StepRangeLen{T,R,S,L}}, ::Type{OR}) where {T,R,S,L,OR<:AbstractRange} =
+    promote_rule(a, StepRangeLen{eltype(OR), eltype(OR), eltype(OR), Int})
+StepRangeLen{T,R,S,L}(r::AbstractRange) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), length(r))
 StepRangeLen{T}(r::AbstractRange) where {T} =
     StepRangeLen(T(first(r)), T(step(r)), length(r))
 StepRangeLen(r::AbstractRange) = StepRangeLen{eltype(r)}(r)
 
-promote_rule(a::Type{LinRange{T1}}, b::Type{LinRange{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
-LinRange{T}(r::LinRange{T}) where {T} = r
-LinRange{T}(r::AbstractRange) where {T} = LinRange{T}(first(r), last(r), length(r))
+function promote_rule(a::Type{LinRange{T1,L1}}, b::Type{LinRange{T2,L2}}) where {T1,T2,L1,L2}
+    L = promote_type(L1, L2)
+    el_same(promote_type(T1, T2), LinRange{T1,L}, LinRange{T2,L})
+end
+LinRange{T,L}(r::LinRange{T,L}) where {T,L} = r
+LinRange{T,L}(r::AbstractRange) where {T,L} = LinRange{T,L}(first(r), last(r), length(r))
+LinRange{T}(r::AbstractRange) where {T} = LinRange{T,typeof(length(r))}(first(r), last(r), length(r))
 LinRange(r::AbstractRange{T}) where {T} = LinRange{T}(r)
 
-promote_rule(a::Type{LinRange{T}}, ::Type{OR}) where {T,OR<:OrdinalRange} =
-    promote_rule(a, LinRange{eltype(OR)})
+promote_rule(a::Type{LinRange{T,L}}, ::Type{OR}) where {T,L,OR<:OrdinalRange} =
+    promote_rule(a, LinRange{eltype(OR),L})
 
-promote_rule(::Type{LinRange{L}}, b::Type{StepRangeLen{T,R,S}}) where {L,T,R,S} =
-    promote_rule(StepRangeLen{L,L,L}, b)
+promote_rule(::Type{LinRange{A,L}}, b::Type{StepRangeLen{T2,R2,S2,L2}}) where {A,L,T2,R2,S2,L2} =
+    promote_rule(StepRangeLen{A,A,A,L}, b)
 
 ## concatenation ##
 
@@ -1204,9 +1340,9 @@ function _reverse(r::StepRangeLen, ::Colon)
     # invalid. As `reverse(r)` is also empty, any offset would work so we keep
     # `r.offset`
     offset = isempty(r) ? r.offset : length(r)-r.offset+1
-    StepRangeLen(r.ref, -r.step, length(r), offset)
+    return typeof(r)(r.ref, -r.step, length(r), offset)
 end
-_reverse(r::LinRange{T}, ::Colon) where {T} = LinRange{T}(r.stop, r.start, length(r))
+_reverse(r::LinRange{T}, ::Colon) where {T} = typeof(r)(r.stop, r.start, length(r))
 
 ## sorting ##
 
@@ -1229,7 +1365,9 @@ function sum(r::AbstractRange{<:Real})
 end
 
 function _in_range(x, r::AbstractRange)
-    if step(r) == 0
+    if !isfinite(x)
+        return false
+    elseif iszero(step(r))
         return !isempty(r) && first(r) == x
     else
         n = round(Integer, (x - first(r)) / step(r)) + 1
@@ -1244,11 +1382,13 @@ in(x::T, r::AbstractRange{T}) where {T} = _in_range(x, r)
 in(x::Integer, r::AbstractUnitRange{<:Integer}) = (first(r) <= x) & (x <= last(r))
 
 in(x::Real, r::AbstractRange{T}) where {T<:Integer} =
-    isinteger(x) && !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
-        (mod(convert(T,x),step(r))-mod(first(r),step(r)) == 0)
+    isinteger(x) && !isempty(r) &&
+    (iszero(step(r)) ? x == first(r) : (x >= minimum(r) && x <= maximum(r) &&
+        (mod(convert(T,x),step(r))-mod(first(r),step(r)) == 0)))
 in(x::AbstractChar, r::AbstractRange{<:AbstractChar}) =
-    !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
-        (mod(Int(x) - Int(first(r)), step(r)) == 0)
+    !isempty(r) &&
+    (iszero(step(r)) ? x == first(r) : (x >= minimum(r) && x <= maximum(r) &&
+        (mod(Int(x) - Int(first(r)), step(r)) == 0)))
 
 # Addition/subtraction of ranges
 
@@ -1258,7 +1398,7 @@ function _define_range_op(@nospecialize f)
             r1l = length(r1)
             (r1l == length(r2) ||
              throw(DimensionMismatch("argument dimensions must match: length of r1 is $r1l, length of r2 is $(length(r2))")))
-            range($f(first(r1), first(r2)), step=$f(step(r1), step(r2)), length=r1l)
+            StepRangeLen($f(first(r1), first(r2)), $f(step(r1), step(r2)), r1l)
         end
 
         function $f(r1::LinRange{T}, r2::LinRange{T}) where T
@@ -1294,14 +1434,14 @@ end
 Find `y` in the range `r` such that ``x ≡ y (mod n)``, where `n = length(r)`,
 i.e. `y = mod(x - first(r), n) + first(r)`.
 
-See also: [`mod1`](@ref).
+See also [`mod1`](@ref).
 
 # Examples
 ```jldoctest
-julia> mod(0, Base.OneTo(3))
+julia> mod(0, Base.OneTo(3))  # mod1(0, 3)
 3
 
-julia> mod(3, 0:2)
+julia> mod(3, 0:2)  # mod(3, 3)
 0
 ```
 
diff --git a/base/rational.jl b/base/rational.jl
index a998787f69685b..0276e23bb5af5c 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -18,21 +18,22 @@ unsafe_rational(num::T, den::T) where {T<:Integer} = unsafe_rational(T, num, den
 unsafe_rational(num::Integer, den::Integer) = unsafe_rational(promote(num, den)...)
 
 @noinline __throw_rational_argerror_typemin(T) = throw(ArgumentError("invalid rational: denominator can't be typemin($T)"))
-function checked_den(num::T, den::T) where T<:Integer
+function checked_den(::Type{T}, num::T, den::T) where T<:Integer
     if signbit(den)
         den = -den
-        signbit(den) && __throw_rational_argerror_typemin(T)
+        signbit(den) && __throw_rational_argerror_typemin(typeof(den))
         num = -num
     end
     return unsafe_rational(T, num, den)
 end
+checked_den(num::T, den::T) where T<:Integer = checked_den(T, num, den)
 checked_den(num::Integer, den::Integer) = checked_den(promote(num, den)...)
 
 @noinline __throw_rational_argerror_zero(T) = throw(ArgumentError("invalid rational: zero($T)//zero($T)"))
 function Rational{T}(num::Integer, den::Integer) where T<:Integer
     iszero(den) && iszero(num) && __throw_rational_argerror_zero(T)
     num, den = divgcd(num, den)
-    return checked_den(T(num), T(den))
+    return checked_den(T, T(num), T(den))
 end
 
 Rational(n::T, d::T) where {T<:Integer} = Rational{T}(n, d)
@@ -279,7 +280,7 @@ function -(x::Rational{T}) where T<:Unsigned
 end
 
 function +(x::Rational, y::Rational)
-    xp, yp = promote(x, y)
+    xp, yp = promote(x, y)::NTuple{2,Rational}
     if isinf(x) && x == y
         return xp
     end
@@ -288,7 +289,7 @@ function +(x::Rational, y::Rational)
 end
 
 function -(x::Rational, y::Rational)
-    xp, yp = promote(x, y)
+    xp, yp = promote(x, y)::NTuple{2,Rational}
     if isinf(x) && x == -y
         return xp
     end
@@ -532,3 +533,21 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
     h = hash_integer(num, h)
     return h
 end
+
+# These methods are only needed for performance. Since `first(r)` and `last(r)` have the
+# same denominator (because their difference is an integer), `length(r)` can be calulated
+# without calling `gcd`.
+function length(r::AbstractUnitRange{T}) where T<:Rational
+    @inline
+    f = first(r)
+    l = last(r)
+    return div(l.num - f.num + f.den, f.den)
+end
+function checked_length(r::AbstractUnitRange{T}) where T<:Rational
+    f = first(r)
+    l = last(r)
+    if isempty(r)
+        return f.num - f.num
+    end
+    return div(checked_add(checked_sub(l.num, f.num), f.den), f.den)
+end
diff --git a/base/reduce.jl b/base/reduce.jl
index 185a158893daa0..59e6aeb117cfa6 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -237,7 +237,7 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
     if ifirst == ilast
         @inbounds a1 = A[ifirst]
         return mapreduce_first(f, op, a1)
-    elseif ifirst + blksize > ilast
+    elseif ilast - ifirst < blksize
         # sequential portion
         @inbounds a1 = A[ifirst]
         @inbounds a2 = A[ifirst+1]
@@ -249,7 +249,7 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
         return v
     else
         # pairwise portion
-        imid = (ifirst + ilast) >> 1
+        imid = ifirst + (ilast - ifirst) >> 1
         v1 = mapreduce_impl(f, op, A, ifirst, imid, blksize)
         v2 = mapreduce_impl(f, op, A, imid+1, ilast, blksize)
         return op(v1, v2)
@@ -299,6 +299,9 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096
 
 # handling empty arrays
 _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))
+_empty_reduce_error(@nospecialize(f), @nospecialize(T::Type)) = throw(ArgumentError("""
+    reducing with $f over an empty collection of element type $T is not allowed.
+    You may be able to prevent this error by supplying an `init` value to the reducer."""))
 
 """
     Base.reduce_empty(op, T)
@@ -306,23 +309,32 @@ _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection i
 The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref) or [`foldr`](@ref)
 with reduction `op` over an empty array with element type of `T`.
 
-If not defined, this will throw an `ArgumentError`.
+This should only be defined in unambiguous cases; for example,
+
+```julia
+Base.reduce_empty(::typeof(+), ::Type{T}) where T = zero(T)
+```
+
+is justified (the sum of zero elements is zero), whereas
+`reduce_empty(::typeof(max), ::Type{Any})` is not (the maximum value of an empty collection
+is generally ambiguous, and especially so when the element type is unknown).
+
+As an alternative, consider supplying an `init` value to the reducer.
 """
-reduce_empty(op, ::Type{T}) where {T} = _empty_reduce_error()
-reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error(+, Union{})
 reduce_empty(::typeof(+), ::Type{T}) where {T} = zero(T)
 reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
-reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error(*, Union{})
 reduce_empty(::typeof(*), ::Type{T}) where {T} = one(T)
 reduce_empty(::typeof(*), ::Type{<:AbstractChar}) = ""
 reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
 
-reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error(add_sum, Union{})
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T} = reduce_empty(+, T)
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned}  = zero(Int)
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
-reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error(mul_prod, Union{})
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T} = reduce_empty(*, T)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned}  = one(Int)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)
@@ -337,11 +349,8 @@ reduce_empty(op::FlipArgs, ::Type{T}) where {T} = reduce_empty(op.f, T)
 
 The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
 [`mapfoldr`](@ref) with map `f` and reduction `op` over an empty array with element type
-of `T`.
-
-If not defined, this will throw an `ArgumentError`.
+of `T`. See [`Base.reduce_empty`](@ref) for more information.
 """
-mapreduce_empty(f, op, T) = _empty_reduce_error()
 mapreduce_empty(::typeof(identity), op, T) = reduce_empty(op, T)
 mapreduce_empty(::typeof(abs), op, T)      = abs(reduce_empty(op, T))
 mapreduce_empty(::typeof(abs2), op, T)     = abs2(reduce_empty(op, T))
@@ -355,7 +364,10 @@ mapreduce_empty_iter(f, op, itr, ItrEltype) =
 
 @inline reduce_empty_iter(op, itr) = reduce_empty_iter(op, itr, IteratorEltype(itr))
 @inline reduce_empty_iter(op, itr, ::HasEltype) = reduce_empty(op, eltype(itr))
-reduce_empty_iter(op, itr, ::EltypeUnknown) = _empty_reduce_error()
+reduce_empty_iter(op, itr, ::EltypeUnknown) = throw(ArgumentError("""
+    reducing over an empty collection of unknown element type is not allowed.
+    You may be able to prevent this error by supplying an `init` value to the reducer."""))
+
 
 # handling of single-element iterators
 """
@@ -518,6 +530,8 @@ for non-empty collections.
 !!! compat "Julia 1.6"
     Keyword argument `init` requires Julia 1.6 or later.
 
+See also: [`reduce`](@ref), [`mapreduce`](@ref), [`count`](@ref), [`union`](@ref).
+
 # Examples
 ```jldoctest
 julia> sum(1:20)
@@ -529,7 +543,7 @@ julia> sum(1:20; init = 0.0)
 """
 sum(a; kw...) = sum(identity, a; kw...)
 sum(a::AbstractArray{Bool}; kw...) =
-    kw.data === NamedTuple() ? count(a) : reduce(add_sum, a; kw...)
+    isempty(kw) ? count(a) : reduce(add_sum, a; kw...)
 
 ## prod
 """
@@ -572,6 +586,8 @@ for non-empty collections.
 !!! compat "Julia 1.6"
     Keyword argument `init` requires Julia 1.6 or later.
 
+See also: [`reduce`](@ref), [`cumprod`](@ref), [`any`](@ref).
+
 # Examples
 ```jldoctest
 julia> prod(1:5)
@@ -722,7 +738,7 @@ julia> maximum([1,2,3])
 3
 
 julia> maximum(())
-ERROR: ArgumentError: reducing over an empty collection is not allowed
+ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -754,7 +770,7 @@ julia> minimum([1,2,3])
 1
 
 julia> minimum([])
-ERROR: ArgumentError: reducing over an empty collection is not allowed
+ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -767,11 +783,11 @@ minimum(a; kw...) = mapreduce(identity, min, a; kw...)
 ## findmax, findmin, argmax & argmin
 
 """
-    findmax(f, domain) -> (f(x), x)
+    findmax(f, domain) -> (f(x), index)
 
-Returns a pair of a value in the codomain (outputs of `f`) and the corresponding
-value in the `domain` (inputs to `f`) such that `f(x)` is maximised. If there
-are multiple maximal points, then the first one will be returned.
+Returns a pair of a value in the codomain (outputs of `f`) and the index of
+the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is maximised.
+If there are multiple maximal points, then the first one will be returned.
 
 `domain` must be a non-empty iterable.
 
@@ -784,20 +800,20 @@ Values are compared with `isless`.
 
 ```jldoctest
 julia> findmax(identity, 5:9)
-(9, 9)
+(9, 5)
 
 julia> findmax(-, 1:10)
 (-1, 1)
 
-julia> findmax(first, [(1, :a), (2, :b), (2, :c)])
-(2, (2, :b))
+julia> findmax(first, [(1, :a), (3, :b), (3, :c)])
+(3, 2)
 
 julia> findmax(cos, 0:π/2:2π)
-(1.0, 0.0)
+(1.0, 1)
 ```
 """
-findmax(f, domain) = mapfoldl(x -> (f(x), x), _rf_findmax, domain)
-_rf_findmax((fm, m), (fx, x)) = isless(fm, fx) ? (fx, x) : (fm, m)
+findmax(f, domain) = mapfoldl( ((k, v),) -> (f(v), k), _rf_findmax, pairs(domain) )
+_rf_findmax((fm, im), (fx, ix)) = isless(fm, fx) ? (fx, ix) : (fm, im)
 
 """
     findmax(itr) -> (x, index)
@@ -806,6 +822,8 @@ Return the maximal element of the collection `itr` and its index or key.
 If there are multiple maximal elements, then the first one will be returned.
 Values are compared with `isless`.
 
+See also: [`findmin`](@ref), [`argmax`](@ref), [`maximum`](@ref).
+
 # Examples
 
 ```jldoctest
@@ -820,14 +838,14 @@ julia> findmax([1, 7, 7, NaN])
 ```
 """
 findmax(itr) = _findmax(itr, :)
-_findmax(a, ::Colon) = mapfoldl( ((k, v),) -> (v, k), _rf_findmax, pairs(a) )
+_findmax(a, ::Colon) = findmax(identity, a)
 
 """
-    findmin(f, domain) -> (f(x), x)
+    findmin(f, domain) -> (f(x), index)
 
-Returns a pair of a value in the codomain (outputs of `f`) and the corresponding
-value in the `domain` (inputs to `f`) such that `f(x)` is minimised. If there
-are multiple minimal points, then the first one will be returned.
+Returns a pair of a value in the codomain (outputs of `f`) and the index of
+the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is minimised.
+If there are multiple minimal points, then the first one will be returned.
 
 `domain` must be a non-empty iterable.
 
@@ -840,21 +858,21 @@ are multiple minimal points, then the first one will be returned.
 
 ```jldoctest
 julia> findmin(identity, 5:9)
-(5, 5)
+(5, 1)
 
 julia> findmin(-, 1:10)
 (-10, 10)
 
-julia> findmin(first, [(1, :a), (1, :b), (2, :c)])
-(1, (1, :a))
+julia> findmin(first, [(2, :a), (2, :b), (3, :c)])
+(2, 1)
 
 julia> findmin(cos, 0:π/2:2π)
-(-1.0, 3.141592653589793)
+(-1.0, 3)
 ```
 
 """
-findmin(f, domain) = mapfoldl(x -> (f(x), x), _rf_findmin, domain)
-_rf_findmin((fm, m), (fx, x)) = isgreater(fm, fx) ? (fx, x) : (fm, m)
+findmin(f, domain) = mapfoldl( ((k, v),) -> (f(v), k), _rf_findmin, pairs(domain) )
+_rf_findmin((fm, im), (fx, ix)) = isgreater(fm, fx) ? (fx, ix) : (fm, im)
 
 """
     findmin(itr) -> (x, index)
@@ -863,6 +881,8 @@ Return the minimal element of the collection `itr` and its index or key.
 If there are multiple minimal elements, then the first one will be returned.
 `NaN` is treated as less than all other values except `missing`.
 
+See also: [`findmax`](@ref), [`argmin`](@ref), [`minimum`](@ref).
+
 # Examples
 
 ```jldoctest
@@ -877,7 +897,7 @@ julia> findmin([1, 7, 7, NaN])
 ```
 """
 findmin(itr) = _findmin(itr, :)
-_findmin(a, ::Colon) = mapfoldl( ((k, v),) -> (v, k), _rf_findmin, pairs(a) )
+_findmin(a, ::Colon) = findmin(identity, a)
 
 """
     argmax(f, domain)
@@ -892,6 +912,8 @@ Values are compared with `isless`.
 !!! compat "Julia 1.7"
     This method requires Julia 1.7 or later.
 
+See also [`argmin`](@ref), [`findmax`](@ref).
+
 # Examples
 ```jldoctest
 julia> argmax(abs, -10:5)
@@ -901,7 +923,7 @@ julia> argmax(cos, 0:π/2:2π)
 0.0
 ```
 """
-argmax(f, domain) = findmax(f, domain)[2]
+argmax(f, domain) = mapfoldl(x -> (f(x), x), _rf_findmax, domain)[2]
 
 """
     argmax(itr)
@@ -913,6 +935,8 @@ The collection must not be empty.
 
 Values are compared with `isless`.
 
+See also: [`argmin`](@ref), [`findmax`](@ref).
+
 # Examples
 ```jldoctest
 julia> argmax([8, 0.1, -9, pi])
@@ -940,6 +964,8 @@ If there are multiple minimal values for `f(x)` then the first one will be found
 !!! compat "Julia 1.7"
     This method requires Julia 1.7 or later.
 
+See also [`argmax`](@ref), [`findmin`](@ref).
+
 # Examples
 ```jldoctest
 julia> argmin(sign, -10:5)
@@ -950,10 +976,9 @@ julia> argmin(x -> -x^3 + x^2 - 10, -5:5)
 
 julia> argmin(acos, 0:0.1:1)
 1.0
-
 ```
 """
-argmin(f, domain) = findmin(f, domain)[2]
+argmin(f, domain) = mapfoldl(x -> (f(x), x), _rf_findmin, domain)[2]
 
 """
     argmin(itr)
@@ -965,6 +990,8 @@ The collection must not be empty.
 
 `NaN` is treated as less than all other values except `missing`.
 
+See also: [`argmax`](@ref), [`findmin`](@ref).
+
 # Examples
 ```jldoctest
 julia> argmin([8, 0.1, -9, pi])
@@ -992,6 +1019,8 @@ If the input contains [`missing`](@ref) values, return `missing` if all non-miss
 values are `false` (or equivalently, if the input contains no `true` value), following
 [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
 
+See also: [`all`](@ref), [`count`](@ref), [`sum`](@ref), [`|`](@ref), , [`||`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = [true,false,false,true]
@@ -1028,6 +1057,8 @@ If the input contains [`missing`](@ref) values, return `missing` if all non-miss
 values are `true` (or equivalently, if the input contains no `false` value), following
 [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
 
+See also: [`all!`](@ref), [`any`](@ref), [`count`](@ref), [`&`](@ref), , [`&&`](@ref), [`allunique`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = [true,false,false,true]
@@ -1167,6 +1198,8 @@ to start counting from and therefore also determines the output type.
 !!! compat "Julia 1.6"
     `init` keyword was added in Julia 1.6.
 
+See also: [`any`](@ref), [`sum`](@ref).
+
 # Examples
 ```jldoctest
 julia> count(i->(4<=i<=6), [2,3,4,5,6])
diff --git a/base/reducedim.jl b/base/reducedim.jl
index 85807851cd23d6..c04a6c1b984f6b 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -3,7 +3,7 @@
 ## Functions to compute the reduced shape
 
 # for reductions that expand 0 dims to 1
-reduced_index(i::OneTo) = OneTo(1)
+reduced_index(i::OneTo{T}) where {T} = OneTo(one(T))
 reduced_index(i::Union{Slice, IdentityUnitRange}) = oftype(i, first(i):first(i))
 reduced_index(i::AbstractUnitRange) =
     throw(ArgumentError(
@@ -193,7 +193,7 @@ end
 
 has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = false
 has_fast_linear_indexing(a::Array) = true
-has_fast_linear_indexing(::Number) = true  # for Broadcasted
+has_fast_linear_indexing(::Union{Number,Ref,AbstractChar}) = true  # 0d objects, for Broadcasted
 has_fast_linear_indexing(bc::Broadcast.Broadcasted) =
     all(has_fast_linear_indexing, bc.args)
 
@@ -295,7 +295,7 @@ reducedim!(op, R::AbstractArray{RT}, A::AbstractArrayOrBroadcasted) where {RT} =
 """
     mapreduce(f, op, A::AbstractArray...; dims=:, [init])
 
-Evaluates to the same as `reduce(op, map(f, A); dims=dims, init=init)`, but is generally
+Evaluates to the same as `reduce(op, map(f, A...); dims=dims, init=init)`, but is generally
 faster because the intermediate array is avoided.
 
 !!! compat "Julia 1.2"
@@ -590,6 +590,8 @@ Compute the maximum value of an array over the given dimensions. See also the
 [`max(a,b)`](@ref) function to take the maximum of two or more arguments,
 which can be applied elementwise to arrays via `max.(a,b)`.
 
+See also: [`maximum!`](@ref), [`extrema`](@ref), [`findmax`](@ref), [`argmax`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -612,7 +614,7 @@ maximum(A::AbstractArray; dims)
 """
     maximum(f, A::AbstractArray; dims)
 
-Compute the maximum value from of calling the function `f` on each element of an array over the given
+Compute the maximum value by calling the function `f` on each element of an array over the given
 dimensions.
 
 # Examples
@@ -665,6 +667,8 @@ Compute the minimum value of an array over the given dimensions. See also the
 [`min(a,b)`](@ref) function to take the minimum of two or more arguments,
 which can be applied elementwise to arrays via `min.(a,b)`.
 
+See also: [`minimum!`](@ref), [`extrema`](@ref), [`findmin`](@ref), [`argmin`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -687,7 +691,7 @@ minimum(A::AbstractArray; dims)
 """
     minimum(f, A::AbstractArray; dims)
 
-Compute the minimum value from of calling the function `f` on each element of an array over the given
+Compute the minimum value by calling the function `f` on each element of an array over the given
 dimensions.
 
 # Examples
@@ -760,7 +764,7 @@ all(A::AbstractArray; dims)
 """
     all(p, A; dims)
 
-Determine whether predicate p returns true for all elements along the given dimensions of an array.
+Determine whether predicate `p` returns `true` for all elements along the given dimensions of an array.
 
 # Examples
 ```jldoctest
@@ -832,7 +836,7 @@ any(::AbstractArray; dims)
 """
     any(p, A; dims)
 
-Determine whether predicate p returns true for any elements along the given dimensions of an array.
+Determine whether predicate `p` returns `true` for any elements along the given dimensions of an array.
 
 # Examples
 ```jldoctest
@@ -995,7 +999,7 @@ julia> findmin(A, dims=1)
 ([1.0 2.0], CartesianIndex{2}[CartesianIndex(1, 1) CartesianIndex(1, 2)])
 
 julia> findmin(A, dims=2)
-([1.0; 3.0], CartesianIndex{2}[CartesianIndex(1, 1); CartesianIndex(2, 1)])
+([1.0; 3.0;;], CartesianIndex{2}[CartesianIndex(1, 1); CartesianIndex(2, 1);;])
 ```
 """
 findmin(A::AbstractArray; dims=:) = _findmin(A, dims)
@@ -1042,7 +1046,7 @@ julia> findmax(A, dims=1)
 ([3.0 4.0], CartesianIndex{2}[CartesianIndex(2, 1) CartesianIndex(2, 2)])
 
 julia> findmax(A, dims=2)
-([2.0; 4.0], CartesianIndex{2}[CartesianIndex(1, 2); CartesianIndex(2, 2)])
+([2.0; 4.0;;], CartesianIndex{2}[CartesianIndex(1, 2); CartesianIndex(2, 2);;])
 ```
 """
 findmax(A::AbstractArray; dims=:) = _findmax(A, dims)
diff --git a/base/reflection.jl b/base/reflection.jl
index bfed4154cd7d83..870103f612a113 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -20,6 +20,8 @@ nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
 
 Get a module's enclosing `Module`. `Main` is its own parent.
 
+See also: [`names`](@ref), [`nameof`](@ref), [`fullname`](@ref), [`@__MODULE__`](@ref).
+
 # Examples
 ```jldoctest
 julia> parentmodule(Main)
@@ -94,6 +96,8 @@ are also included.
 
 As a special case, all names defined in `Main` are considered \"exported\",
 since it is not idiomatic to explicitly export names from `Main`.
+
+See also: [`@locals`](@ref Base.@locals), [`@__MODULE__`](@ref).
 """
 names(m::Module; all::Bool = false, imported::Bool = false) =
     sort!(ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported))
@@ -125,7 +129,7 @@ function _fieldnames(@nospecialize t)
             throw(ArgumentError("type does not have definite field names"))
         end
     end
-    isdefined(t, :names) ? t.names : t.name.names
+    return t.name.names
 end
 
 """
@@ -150,7 +154,7 @@ function fieldname(t::DataType, i::Integer)
     end
     throw_need_pos_int(i) = throw(ArgumentError("Field numbers must be positive integers. $i is invalid."))
 
-    t.abstract && throw_not_def_field()
+    isabstracttype(t) && throw_not_def_field()
     names = _fieldnames(t)
     n_fields = length(names)::Int
     i > n_fields && throw_field_access(t, i, n_fields)
@@ -167,10 +171,15 @@ fieldname(t::Type{<:Tuple}, i::Integer) =
 
 Get a tuple with the names of the fields of a `DataType`.
 
+See also [`propertynames`](@ref), [`hasfield`](@ref).
+
 # Examples
 ```jldoctest
 julia> fieldnames(Rational)
 (:num, :den)
+
+julia> fieldnames(typeof(1+im))
+(:re, :im)
 ```
 """
 fieldnames(t::DataType) = (fieldcount(t); # error check to make sure type is specific enough
@@ -284,9 +293,11 @@ macro locals()
 end
 
 """
-    objectid(x)
+    objectid(x) -> UInt
 
 Get a hash value for `x` based on object identity. `objectid(x)==objectid(y)` if `x === y`.
+
+See also [`hash`](@ref), [`IdDict`](@ref).
 """
 objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)
 
@@ -446,6 +457,8 @@ Return `true` iff value `v` is mutable.  See [Mutable Composite Types](@ref)
 for a discussion of immutability. Note that this function works on values, so if you give it
 a type, it will tell you that a value of `DataType` is mutable.
 
+See also [`isbits`](@ref), [`isstructtype`](@ref).
+
 # Examples
 ```jldoctest
 julia> ismutable(1)
@@ -458,7 +471,7 @@ true
 !!! compat "Julia 1.5"
     This function requires at least Julia 1.5.
 """
-ismutable(@nospecialize(x)) = (@_pure_meta; typeof(x).mutable)
+ismutable(@nospecialize(x)) = (@_pure_meta; typeof(x).name.flags & 0x2 == 0x2)
 
 
 """
@@ -473,7 +486,7 @@ Determine whether type `T` was declared as a mutable type
 function ismutabletype(@nospecialize(t::Type))
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
-    return isa(t, DataType) && t.mutable
+    return isa(t, DataType) && t.name.flags & 0x2 == 0x2
 end
 
 
@@ -489,7 +502,7 @@ function isstructtype(@nospecialize(t::Type))
     # TODO: what to do for `Union`?
     isa(t, DataType) || return false
     hasfield = !isdefined(t, :types) || !isempty(t.types)
-    return hasfield || (t.size == 0 && !t.abstract)
+    return hasfield || (t.size == 0 && !isabstracttype(t))
 end
 
 """
@@ -504,7 +517,7 @@ function isprimitivetype(@nospecialize(t::Type))
     # TODO: what to do for `Union`?
     isa(t, DataType) || return false
     hasfield = !isdefined(t, :types) || !isempty(t.types)
-    return !hasfield && t.size != 0 && !t.abstract
+    return !hasfield && t.size != 0 && !isabstracttype(t)
 end
 
 """
@@ -519,6 +532,8 @@ This category of types is significant since they are valid as type parameters,
 may not track [`isdefined`](@ref) / [`isassigned`](@ref) status,
 and have a defined layout that is compatible with C.
 
+See also [`isbits`](@ref), [`isprimitivetype`](@ref), [`ismutable`](@ref).
+
 # Examples
 ```jldoctest
 julia> isbitstype(Complex{Float64})
@@ -528,14 +543,14 @@ julia> isbitstype(Complex)
 false
 ```
 """
-isbitstype(@nospecialize(t::Type)) = (@_pure_meta; isa(t, DataType) && t.isbitstype)
+isbitstype(@nospecialize(t::Type)) = (@_pure_meta; isa(t, DataType) && (t.flags & 0x8) == 0x8)
 
 """
     isbits(x)
 
-Return `true` if `x` is an instance of an `isbitstype` type.
+Return `true` if `x` is an instance of an [`isbitstype`](@ref) type.
 """
-isbits(@nospecialize x) = (@_pure_meta; typeof(x).isbitstype)
+isbits(@nospecialize x) = (@_pure_meta; typeof(x).flags & 0x8 == 0x8)
 
 """
     isdispatchtuple(T)
@@ -544,7 +559,7 @@ Determine whether type `T` is a tuple "leaf type",
 meaning it could appear as a type signature in dispatch
 and has no subtypes (or supertypes) which could appear in a call.
 """
-isdispatchtuple(@nospecialize(t)) = (@_pure_meta; isa(t, DataType) && t.isdispatchtuple)
+isdispatchtuple(@nospecialize(t)) = (@_pure_meta; isa(t, DataType) && (t.flags & 0x4) == 0x4)
 
 iskindtype(@nospecialize t) = (t === DataType || t === UnionAll || t === Union || t === typeof(Bottom))
 isconcretedispatch(@nospecialize t) = isconcretetype(t) && !iskindtype(t)
@@ -564,6 +579,8 @@ end
 Determine whether type `T` is a concrete type, meaning it could have direct instances
 (values `x` such that `typeof(x) === T`).
 
+See also: [`isbits`](@ref), [`isabstracttype`](@ref), [`issingletontype`](@ref).
+
 # Examples
 ```jldoctest
 julia> isconcretetype(Complex)
@@ -585,7 +602,7 @@ julia> isconcretetype(Union{Int,String})
 false
 ```
 """
-isconcretetype(@nospecialize(t)) = (@_pure_meta; isa(t, DataType) && t.isconcretetype)
+isconcretetype(@nospecialize(t)) = (@_pure_meta; isa(t, DataType) && (t.flags & 0x2) == 0x2)
 
 """
     isabstracttype(T)
@@ -606,7 +623,7 @@ function isabstracttype(@nospecialize(t))
     @_pure_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
-    return isa(t, DataType) && t.abstract
+    return isa(t, DataType) && (t.name.flags & 0x1) == 0x1
 end
 
 """
@@ -637,19 +654,20 @@ use it in the following manner to summarize information about a struct:
 julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)];
 
 julia> structinfo(Base.Filesystem.StatStruct)
-12-element Vector{Tuple{UInt64, Symbol, DataType}}:
- (0x0000000000000000, :device, UInt64)
- (0x0000000000000008, :inode, UInt64)
- (0x0000000000000010, :mode, UInt64)
- (0x0000000000000018, :nlink, Int64)
- (0x0000000000000020, :uid, UInt64)
- (0x0000000000000028, :gid, UInt64)
- (0x0000000000000030, :rdev, UInt64)
- (0x0000000000000038, :size, Int64)
- (0x0000000000000040, :blksize, Int64)
- (0x0000000000000048, :blocks, Int64)
- (0x0000000000000050, :mtime, Float64)
- (0x0000000000000058, :ctime, Float64)
+13-element Vector{Tuple{UInt64, Symbol, Type}}:
+ (0x0000000000000000, :desc, Union{RawFD, String})
+ (0x0000000000000008, :device, UInt64)
+ (0x0000000000000010, :inode, UInt64)
+ (0x0000000000000018, :mode, UInt64)
+ (0x0000000000000020, :nlink, Int64)
+ (0x0000000000000028, :uid, UInt64)
+ (0x0000000000000030, :gid, UInt64)
+ (0x0000000000000038, :rdev, UInt64)
+ (0x0000000000000040, :size, Int64)
+ (0x0000000000000048, :blksize, Int64)
+ (0x0000000000000050, :blocks, Int64)
+ (0x0000000000000058, :mtime, Float64)
+ (0x0000000000000060, :ctime, Float64)
 ```
 """
 fieldoffset(x::DataType, idx::Integer) = (@_pure_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
@@ -731,7 +749,7 @@ function fieldcount(@nospecialize t)
         throw(TypeError(:fieldcount, DataType, t))
     end
     if t.name === NamedTuple_typename
-        names, types = t.parameters
+        names, types = t.parameters[1], t.parameters[2]
         if names isa Tuple
             return length(names)
         end
@@ -740,7 +758,7 @@ function fieldcount(@nospecialize t)
         end
         abstr = true
     else
-        abstr = t.abstract || (t.name === Tuple.name && isvatuple(t))
+        abstr = isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
     end
     if abstr
         throw(ArgumentError("type does not have a definite number of fields"))
@@ -845,7 +863,7 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     return map(method_instances(f, t)) do m
-        if generated && isgenerated(m)
+        if generated && hasgenerator(m)
             if may_invoke_generator(m)
                 return ccall(:jl_code_for_staged, Any, (Any,), m)::CodeInfo
             else
@@ -860,8 +878,8 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
     end
 end
 
-isgenerated(m::Method) = isdefined(m, :generator)
-isgenerated(m::Core.MethodInstance) = isgenerated(m.def)
+hasgenerator(m::Method) = isdefined(m, :generator)
+hasgenerator(m::Core.MethodInstance) = hasgenerator(m.def::Method)
 
 # low-level method lookup functions used by the compiler
 
@@ -878,13 +896,16 @@ function _methods(@nospecialize(f), @nospecialize(t), lim::Int, world::UInt)
 end
 
 function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt)
-    return _methods_by_ftype(t, lim, world, false, RefValue{UInt}(typemin(UInt)), RefValue{UInt}(typemax(UInt)), Ptr{Int32}(C_NULL))
+    return _methods_by_ftype(t, nothing, lim, world)
+end
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt)
+    return _methods_by_ftype(t, mt, lim, world, false, RefValue{UInt}(typemin(UInt)), RefValue{UInt}(typemax(UInt)), Ptr{Int32}(C_NULL))
 end
-function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt, ambig::Bool, min::Array{UInt,1}, max::Array{UInt,1}, has_ambig::Array{Int32,1})
-    return ccall(:jl_matching_methods, Any, (Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt, ambig::Bool, min::Array{UInt,1}, max::Array{UInt,1}, has_ambig::Array{Int32,1})
+    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
 end
-function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
-    return ccall(:jl_matching_methods, Any, (Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
+    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
 end
 
 function _method_by_ftype(args...)
@@ -898,15 +919,13 @@ end
 # high-level, more convenient method lookup functions
 
 # type for reflecting and pretty-printing a subset of methods
-mutable struct MethodList
+mutable struct MethodList <: AbstractArray{Method,1}
     ms::Array{Method,1}
     mt::Core.MethodTable
 end
 
-length(m::MethodList) = length(m.ms)
-isempty(m::MethodList) = isempty(m.ms)
-iterate(m::MethodList, s...) = iterate(m.ms, s...)
-eltype(::Type{MethodList}) = Method
+size(m::MethodList) = size(m.ms)
+getindex(m::MethodList, i::Integer) = m.ms[i]
 
 function MethodList(mt::Core.MethodTable)
     ms = Method[]
@@ -927,6 +946,8 @@ A list of modules can also be specified as an array.
 
 !!! compat "Julia 1.4"
     At least Julia 1.4 is required for specifying a module.
+
+See also: [`which`](@ref) and `@which`.
 """
 function methods(@nospecialize(f), @nospecialize(t),
                  mod::Union{Tuple{Module},AbstractArray{Module},Nothing}=nothing)
@@ -952,7 +973,7 @@ function methods_including_ambiguous(@nospecialize(f), @nospecialize(t))
     world = typemax(UInt)
     min = RefValue{UInt}(typemin(UInt))
     max = RefValue{UInt}(typemax(UInt))
-    ms = _methods_by_ftype(tt, -1, world, true, min, max, Ptr{Int32}(C_NULL))
+    ms = _methods_by_ftype(tt, nothing, -1, world, true, min, max, Ptr{Int32}(C_NULL))
     isa(ms, Bool) && return ms
     return MethodList(Method[(m::Core.MethodMatch).method for m in ms], typeof(f).name.mt)
 end
@@ -1262,19 +1283,23 @@ function print_statement_costs(io::IO, @nospecialize(tt::Type);
         maxcost = Core.Compiler.statement_costs!(cst, code.code, code, Any[match.sparams...], false, params)
         nd = ndigits(maxcost)
         println(io, meth)
-        IRShow.show_ir(io, code, (io, linestart, idx) -> (print(io, idx > 0 ? lpad(cst[idx], nd+1) : " "^(nd+1), " "); return ""))
+        irshow_config = IRShow.IRShowConfig() do io, linestart, idx
+            print(io, idx > 0 ? lpad(cst[idx], nd+1) : " "^(nd+1), " ")
+            return ""
+        end
+        IRShow.show_ir(io, code, irshow_config)
         println(io)
     end
 end
 
 print_statement_costs(args...; kwargs...) = print_statement_costs(stdout, args...; kwargs...)
 
-function _which(@nospecialize(tt::Type), world=typemax(UInt))
+function _which(@nospecialize(tt::Type), world=get_world_counter())
     min_valid = RefValue{UInt}(typemin(UInt))
     max_valid = RefValue{UInt}(typemax(UInt))
     match = ccall(:jl_gf_invoke_lookup_worlds, Any,
         (Any, UInt, Ptr{Csize_t}, Ptr{Csize_t}),
-        tt, typemax(UInt), min_valid, max_valid)
+        tt, world, min_valid, max_valid)
     if match === nothing
         error("no unique matching method found for the specified argument types")
     end
@@ -1287,6 +1312,8 @@ end
 Returns the method of `f` (a `Method` object) that would be called for arguments of the given `types`.
 
 If `types` is an abstract type, then the method that would be called by `invoke` is returned.
+
+See also: [`parentmodule`](@ref), and `@which` and `@edit` in [`InteractiveUtils`](@ref man-interactive-utils).
 """
 function which(@nospecialize(f), @nospecialize(t))
     if isa(f, Core.Builtin)
@@ -1508,7 +1535,7 @@ function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
         min = UInt[typemin(UInt)]
         max = UInt[typemax(UInt)]
         has_ambig = Int32[0]
-        ms = _methods_by_ftype(ti, -1, typemax(UInt), true, min, max, has_ambig)::Vector
+        ms = _methods_by_ftype(ti, nothing, -1, typemax(UInt), true, min, max, has_ambig)::Vector
         has_ambig[] == 0 && return false
         if !ambiguous_bottom
             filter!(ms) do m::Core.MethodMatch
@@ -1618,7 +1645,6 @@ min_world(m::Core.CodeInfo) = m.min_world
 max_world(m::Core.CodeInfo) = m.max_world
 get_world_counter() = ccall(:jl_get_world_counter, UInt, ())
 
-
 """
     propertynames(x, private=false)
 
@@ -1631,6 +1657,8 @@ as well to get the properties of an instance of the type.
 of the documented interface of `x`.   If you want it to also return "private"
 fieldnames intended for internal use, pass `true` for the optional second argument.
 REPL tab completion on `x.` shows only the `private=false` properties.
+
+See also: [`hasproperty`](@ref), [`hasfield`](@ref).
 """
 propertynames(x) = fieldnames(typeof(x))
 propertynames(m::Module) = names(m)
@@ -1643,5 +1671,55 @@ Return a boolean indicating whether the object `x` has `s` as one of its own pro
 
 !!! compat "Julia 1.2"
      This function requires at least Julia 1.2.
+
+See also: [`propertynames`](@ref), [`hasfield`](@ref).
 """
 hasproperty(x, s::Symbol) = s in propertynames(x)
+
+"""
+    @invoke f(arg::T, ...; kwargs...)
+
+Provides a convenient way to call [`invoke`](@ref);
+`@invoke f(arg1::T1, arg2::T2; kwargs...)` will be expanded into `invoke(f, Tuple{T1,T2}, arg1, arg2; kwargs...)`.
+When an argument's type annotation is omitted, it's specified as `Any` argument, e.g.
+`@invoke f(arg1::T, arg2)` will be expanded into `invoke(f, Tuple{T,Any}, arg1, arg2)`.
+"""
+macro invoke(ex)
+    f, args, kwargs = destructure_callex(ex)
+    arg2typs = map(args) do x
+        isexpr(x, :(::)) ? (x.args...,) : (x, GlobalRef(Core, :Any))
+    end
+    args, argtypes = first.(arg2typs), last.(arg2typs)
+    return esc(:($(GlobalRef(Core, :invoke))($(f), Tuple{$(argtypes...)}, $(args...); $(kwargs...))))
+end
+
+"""
+    @invokelatest f(args...; kwargs...)
+
+Provides a convenient way to call [`Base.invokelatest`](@ref).
+`@invokelatest f(args...; kwargs...)` will simply be expanded into
+`Base.invokelatest(f, args...; kwargs...)`.
+"""
+macro invokelatest(ex)
+    f, args, kwargs = destructure_callex(ex)
+    return esc(:($(GlobalRef(@__MODULE__, :invokelatest))($(f), $(args...); $(kwargs...))))
+end
+
+function destructure_callex(ex)
+    isexpr(ex, :call) || throw(ArgumentError("a call expression f(args...; kwargs...) should be given"))
+
+    f = first(ex.args)
+    args = []
+    kwargs = []
+    for x in ex.args[2:end]
+        if isexpr(x, :parameters)
+            append!(kwargs, x.args)
+        elseif isexpr(x, :kw)
+            push!(kwargs, x)
+        else
+            push!(args, x)
+        end
+    end
+
+    return f, args, kwargs
+end
diff --git a/base/refpointer.jl b/base/refpointer.jl
index 67cec0925ff589..cd179c87b30d5d 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -117,7 +117,7 @@ convert(::Type{Ref{T}}, x::AbstractArray{T}) where {T} = RefArray(x, 1)
 function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefArray{T})::P where T
     if allocatedinline(T)
         p = pointer(b.x, b.i)
-    elseif isconcretetype(T) && T.mutable
+    elseif isconcretetype(T) && ismutabletype(T)
         p = pointer_from_objref(b.x[b.i])
     else
         # see comment on equivalent branch for RefValue
diff --git a/base/refvalue.jl b/base/refvalue.jl
index cf5f4e6b74d6fd..7cbb651d41aee7 100644
--- a/base/refvalue.jl
+++ b/base/refvalue.jl
@@ -38,7 +38,7 @@ isassigned(x::RefValue) = isdefined(x, :x)
 function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefValue{T})::P where T
     if allocatedinline(T)
         p = pointer_from_objref(b)
-    elseif isconcretetype(T) && T.mutable
+    elseif isconcretetype(T) && ismutabletype(T)
         p = pointer_from_objref(b.x)
     else
         # If the slot is not leaf type, it could be either immutable or not.
diff --git a/base/regex.jl b/base/regex.jl
index a79f51995fab34..b6019c9c2a941b 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -145,6 +145,47 @@ in a string using an `AbstractPattern`.
 """
 abstract type AbstractMatch end
 
+"""
+    RegexMatch
+
+A type representing a single match to a `Regex` found in a string.
+Typically created from the [`match`](@ref) function.
+
+The `match` field stores the substring of the entire matched string.
+The `captures` field stores the substrings for each capture group, indexed by number.
+To index by capture group name, the entire match object should be indexed instead,
+as shown in the examples.
+The location of the start of the match is stored in the `offset` field.
+The `offsets` field stores the locations of the start of each capture group,
+with 0 denoting a group that was not captured.
+
+This type can be used as an iterator over the capture groups of the `Regex`,
+yielding the substrings captured in each group.
+Because of this, the captures of a match can be destructured.
+If a group was not captured, `nothing` will be yielded instead of a substring.
+
+Methods that accept a `RegexMatch` object are defined for [`iterate`](@ref),
+[`length`](@ref), [`eltype`](@ref), [`keys`](@ref keys(::RegexMatch)), [`haskey`](@ref), and
+[`getindex`](@ref), where keys are the the names or numbers of a capture group.
+See [`keys`](@ref keys(::RegexMatch)) for more information.
+
+# Examples
+```jldoctest
+julia> m = match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30 in the morning")
+RegexMatch("11:30", hour="11", minute="30", 3=nothing)
+
+julia> hr, min, ampm = m;
+
+julia> hr
+"11"
+
+julia> m["minute"]
+"30"
+
+julia> m.match
+"11:30"
+```
+"""
 struct RegexMatch <: AbstractMatch
     match::SubString{String}
     captures::Vector{Union{Nothing,SubString{String}}}
@@ -153,6 +194,28 @@ struct RegexMatch <: AbstractMatch
     regex::Regex
 end
 
+"""
+    keys(m::RegexMatch) -> Vector
+
+Return a vector of keys for all capture groups of the underlying regex.
+A key is included even if the capture group fails to match.
+That is, `idx` will be in the return value even if `m[idx] == nothing`.
+
+Unnamed capture groups will have integer keys corresponding to their index.
+Named capture groups will have string keys.
+
+!!! compat "Julia 1.6"
+    This method was added in Julia 1.6
+
+# Examples
+```jldoctest
+julia> keys(match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30"))
+3-element Vector{Any}:
+  "hour"
+  "minute"
+ 3
+```
+"""
 function keys(m::RegexMatch)
     idx_to_capture_name = PCRE.capture_names(m.regex.regex)
     return map(eachindex(m.captures)) do i
@@ -275,7 +338,7 @@ end
 """
     match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])
 
-Search for the first match of the regular expression `r` in `s` and return a `RegexMatch`
+Search for the first match of the regular expression `r` in `s` and return a [`RegexMatch`](@ref)
 object containing the match, or nothing if the match failed. The matching substring can be
 retrieved by accessing `m.match` and the captured sequences can be retrieved by accessing
 `m.captures` The optional `idx` argument specifies an index at which to start the search.
@@ -465,6 +528,7 @@ end
 Stores the given string `substr` as a `SubstitutionString`, for use in regular expression
 substitutions. Most commonly constructed using the [`@s_str`](@ref) macro.
 
+# Examples
 ```jldoctest
 julia> SubstitutionString("Hello \\\\g<name>, it's \\\\1")
 s"Hello \\g<name>, it's \\1"
@@ -501,6 +565,7 @@ Construct a substitution string, used for regular expression substitutions.  Wit
 string, sequences of the form `\\N` refer to the Nth capture group in the regex, and
 `\\g<groupname>` refers to a named capture group with name `groupname`.
 
+# Examples
 ```jldoctest
 julia> msg = "#Hello# from Julia";
 
@@ -526,13 +591,20 @@ _free_pat_replacer(r::RegexAndMatchData) = PCRE.free_match_data(r.match_data)
 
 replace_err(repl) = error("Bad replacement string: $repl")
 
-function _write_capture(io, re::RegexAndMatchData, group)
+function _write_capture(io::IO, group::Int, str, r, re::RegexAndMatchData)
     len = PCRE.substring_length_bynumber(re.match_data, group)
+    # in the case of an optional group that doesn't match, len == 0
+    len == 0 && return
     ensureroom(io, len+1)
     PCRE.substring_copy_bynumber(re.match_data, group,
         pointer(io.data, io.ptr), len+1)
     io.ptr += len
     io.size = max(io.size, io.ptr - 1)
+    nothing
+end
+function _write_capture(io::IO, group::Int, str, r, re)
+    group == 0 || replace_err("pattern is not a Regex")
+    return print(io, SubString(str, r))
 end
 
 
@@ -540,7 +612,7 @@ const SUB_CHAR = '\\'
 const GROUP_CHAR = 'g'
 const KEEP_ESC = [SUB_CHAR, GROUP_CHAR, '0':'9'...]
 
-function _replace(io, repl_s::SubstitutionString, str, r, re::RegexAndMatchData)
+function _replace(io, repl_s::SubstitutionString, str, r, re)
     LBRACKET = '<'
     RBRACKET = '>'
     repl = unescape_string(repl_s.string, KEEP_ESC)
@@ -564,7 +636,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re::RegexAndMatchData)
                         break
                     end
                 end
-                _write_capture(io, re, group)
+                _write_capture(io, group, str, r, re)
             elseif repl[next_i] == GROUP_CHAR
                 i = nextind(repl, next_i)
                 if i > e || repl[i] != LBRACKET
@@ -577,15 +649,16 @@ function _replace(io, repl_s::SubstitutionString, str, r, re::RegexAndMatchData)
                     i = nextind(repl, i)
                     i > e && replace_err(repl)
                 end
-                #  TODO: avoid this allocation
                 groupname = SubString(repl, groupstart, prevind(repl, i))
                 if all(isdigit, groupname)
-                    _write_capture(io, re, parse(Int, groupname))
-                else
+                    group = parse(Int, groupname)
+                elseif re isa RegexAndMatchData
                     group = PCRE.substring_number_from_name(re.re.regex, groupname)
                     group < 0 && replace_err("Group $groupname not found in regex $(re.re)")
-                    _write_capture(io, re, group)
+                else
+                    group = -1
                 end
+                _write_capture(io, group, str, r, re)
                 i = nextind(repl, i)
             else
                 replace_err(repl)
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index c49c566ae65e5d..ad1e8b26c4461d 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -12,25 +12,25 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     writable::Bool
 
     function throwbits(S::Type, T::Type, U::Type)
-        @_noinline_meta
+        @noinline
         throw(ArgumentError("cannot reinterpret `$(S)` as `$(T)`, type `$(U)` is not a bits type"))
     end
     function throwsize0(S::Type, T::Type, msg)
-        @_noinline_meta
+        @noinline
         throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a $msg size"))
     end
 
     global reinterpret
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
         function thrownonint(S::Type, T::Type, dim)
-            @_noinline_meta
+            @noinline
             throw(ArgumentError("""
                 cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`.
                 The resulting array would have non-integral first dimension.
                 """))
         end
         function throwaxes1(S::Type, T::Type, ax1)
-            @_noinline_meta
+            @noinline
             throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` when the first axis is $ax1. Try reshaping first."))
         end
         isbitstype(T) || throwbits(S, T, T)
@@ -51,11 +51,11 @@ struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T
     # With reshaping
     function reinterpret(::typeof(reshape), ::Type{T}, a::A) where {T,S,A<:AbstractArray{S}}
         function throwintmult(S::Type, T::Type)
-            @_noinline_meta
+            @noinline
             throw(ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got $(sizeof(T))) and `sizeof(eltype(a))` (got $(sizeof(S))) be an integer multiple of the other"))
         end
         function throwsize1(a::AbstractArray, T::Type)
-            @_noinline_meta
+            @noinline
             throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $(eltype(a)) requires that `axes(a, 1)` (got $(axes(a, 1))) be equal to 1:$(sizeof(T) ÷ sizeof(eltype(a))) (from the ratio of element sizes)"))
         end
         isbitstype(T) || throwbits(S, T, T)
@@ -141,7 +141,7 @@ StridedVecOrMat{T} = Union{StridedVector{T}, StridedMatrix{T}}
 stride(a::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}, i::Int) = _stride(a, i)
 
 function stride(a::ReinterpretArray, i::Int)
-    a.parent isa StridedArray || ArgumentError("Parent must be strided.") |> throw
+    a.parent isa StridedArray || throw(ArgumentError("Parent must be strided."))
     return _stride(a, i)
 end
 
@@ -157,7 +157,7 @@ function _stride(a, i)
 end
 
 function strides(a::ReinterpretArray)
-    a.parent isa StridedArray || ArgumentError("Parent must be strided.") |> throw
+    a.parent isa StridedArray || throw(ArgumentError("Parent must be strided."))
     size_to_strides(1, size(a)...)
 end
 strides(a::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}) = size_to_strides(1, size(a)...)
@@ -683,7 +683,7 @@ end
 
 @noinline function mapreduce_impl(f::F, op::OP, A::AbstractArrayOrBroadcasted,
                                   ifirst::SCI, ilast::SCI, blksize::Int) where {F,OP,SCI<:SCartesianIndex2{K}} where K
-    if ifirst.j + blksize > ilast.j
+    if ilast.j - ifirst.j < blksize
         # sequential portion
         @inbounds a1 = A[ifirst]
         @inbounds a2 = A[SCI(2,ifirst.j)]
@@ -702,7 +702,7 @@ end
         return v
     else
         # pairwise portion
-        jmid = (ifirst.j + ilast.j) >> 1
+        jmid = ifirst.j + (ilast.j - ifirst.j) >> 1
         v1 = mapreduce_impl(f, op, A, ifirst, SCI(K,jmid), blksize)
         v2 = mapreduce_impl(f, op, A, SCI(1,jmid+1), ilast, blksize)
         return op(v1, v2)
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index d9a9f4eafaa803..cabe3c9d10a589 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -113,6 +113,7 @@ reshape(parent::AbstractArray, dims::Dims)        = _reshape(parent, dims)
 
 # Allow missing dimensions with Colon():
 reshape(parent::AbstractVector, ::Colon) = parent
+reshape(parent::AbstractVector, ::Tuple{Colon}) = parent
 reshape(parent::AbstractArray, dims::Int...) = reshape(parent, dims)
 reshape(parent::AbstractArray, dims::Union{Int,Colon}...) = reshape(parent, dims)
 reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims))
@@ -146,7 +147,7 @@ end
 # product of trailing dims into the last element
 rdims_trailing(l, inds...) = length(l) * rdims_trailing(inds...)
 rdims_trailing(l) = length(l)
-rdims(out::Val{N}, inds::Tuple) where {N} = rdims(ntuple(i -> OneTo(1), Val(N)), inds)
+rdims(out::Val{N}, inds::Tuple) where {N} = rdims(ntuple(Returns(OneTo(1)), Val(N)), inds)
 rdims(out::Tuple{}, inds::Tuple{}) = () # N == 0, M == 0
 rdims(out::Tuple{}, inds::Tuple{Any}) = ()
 rdims(out::Tuple{}, inds::NTuple{M,Any}) where {M} = ()
@@ -286,7 +287,7 @@ viewindexing(I::Tuple{Slice, ReshapedUnitRange, Vararg{ScalarIndex}}) = IndexLin
 viewindexing(I::Tuple{ReshapedRange, Vararg{ScalarIndex}}) = IndexLinear()
 compute_stride1(s, inds, I::Tuple{ReshapedRange, Vararg{Any}}) = s*step(I[1].parent)
 compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{ReshapedRange}) =
-    (@_inline_meta; first(I[1]) - first(axes1(I[1]))*stride1)
+    (@inline; first(I[1]) - first(axes1(I[1]))*stride1)
 substrides(strds::NTuple{N,Int}, I::Tuple{ReshapedUnitRange, Vararg{Any}}) where N =
     (size_to_strides(strds[1], size(I[1])...)..., substrides(tail(strds), tail(I))...)
 unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} =
diff --git a/base/ryu/Ryu.jl b/base/ryu/Ryu.jl
index 1d260fe9b3696e..81d1c41f4c19f0 100644
--- a/base/ryu/Ryu.jl
+++ b/base/ryu/Ryu.jl
@@ -64,7 +64,7 @@ Various options for the output format include:
   * `hash`: whether the decimal point should be written, even if no additional digits are needed for precision
   * `precision`: minimum number of significant digits to be included in the decimal string; extra `'0'` characters will be added for padding if necessary
   * `decchar`: decimal point character to be used
-  * `trimtrailingzeros`: whether trailing zeros should be removed
+  * `trimtrailingzeros`: whether trailing zeros of fractional part should be removed
 """
 function writefixed(x::T,
     precision::Integer,
diff --git a/base/ryu/exp.jl b/base/ryu/exp.jl
index cf1fe23105b8c9..30291212d014d3 100644
--- a/base/ryu/exp.jl
+++ b/base/ryu/exp.jl
@@ -1,25 +1,16 @@
-@inline function writeexp(buf, pos, v::T,
+function writeexp(buf, pos, v::T,
     precision=-1, plus=false, space=false, hash=false,
     expchar=UInt8('e'), decchar=UInt8('.'), trimtrailingzeros=false) where {T <: Base.IEEEFloat}
     @assert 0 < pos <= length(buf)
     startpos = pos
     x = Float64(v)
-    neg = signbit(x)
+    pos = append_sign(x, plus, space, buf, pos)
+
     # special cases
     if x == 0
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
         buf[pos] = UInt8('0')
         pos += 1
-        if precision > 0
+        if precision > 0 && !trimtrailingzeros
             buf[pos] = decchar
             pos += 1
             for _ = 1:precision
@@ -41,16 +32,6 @@
         buf[pos + 2] = UInt8('N')
         return pos + 3
     elseif !isfinite(x)
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
         buf[pos] = UInt8('I')
         buf[pos + 1] = UInt8('n')
         buf[pos + 2] = UInt8('f')
@@ -70,16 +51,6 @@
     end
     nonzero = false
     precision += 1
-    if neg
-        buf[pos] = UInt8('-')
-        pos += 1
-    elseif plus
-        buf[pos] = UInt8('+')
-        pos += 1
-    elseif space
-        buf[pos] = UInt8(' ')
-        pos += 1
-    end
     digits = 0
     printedDigits = 0
     availableDigits = 0
@@ -213,7 +184,7 @@
         roundPos = pos
         while true
             roundPos -= 1
-            if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-')
+            if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
                 buf[roundPos + 1] = UInt8('1')
                 e += 1
                 break
diff --git a/base/ryu/fixed.jl b/base/ryu/fixed.jl
index 4be1b3741832eb..e0085f5c66dab1 100644
--- a/base/ryu/fixed.jl
+++ b/base/ryu/fixed.jl
@@ -1,30 +1,18 @@
-@inline function writefixed(buf, pos, v::T,
+function writefixed(buf, pos, v::T,
     precision=-1, plus=false, space=false, hash=false,
     decchar=UInt8('.'), trimtrailingzeros=false) where {T <: Base.IEEEFloat}
     @assert 0 < pos <= length(buf)
     startpos = pos
     x = Float64(v)
-    neg = signbit(x)
+    pos = append_sign(x, plus, space, buf, pos)
+
     # special cases
     if x == 0
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
         buf[pos] = UInt8('0')
         pos += 1
-        if precision > 0
+        if precision > 0 && !trimtrailingzeros
             buf[pos] = decchar
             pos += 1
-            if trimtrailingzeros
-                precision = 1
-            end
             for _ = 1:precision
                 buf[pos] = UInt8('0')
                 pos += 1
@@ -40,16 +28,6 @@
         buf[pos + 2] = UInt8('N')
         return pos + 3
     elseif !isfinite(x)
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
         buf[pos] = UInt8('I')
         buf[pos + 1] = UInt8('n')
         buf[pos + 2] = UInt8('f')
@@ -68,16 +46,6 @@
         m2 = (Int64(1) << 52) | mant
     end
     nonzero = false
-    if neg
-        buf[pos] = UInt8('-')
-        pos += 1
-    elseif plus
-        buf[pos] = UInt8('+')
-        pos += 1
-    elseif space
-        buf[pos] = UInt8(' ')
-        pos += 1
-    end
     if e2 >= -52
         idx = e2 < 0 ? 0 : indexforexp(e2)
         p10bits = pow10bitsforindex(idx)
@@ -101,9 +69,11 @@
         buf[pos] = UInt8('0')
         pos += 1
     end
+    hasfractional = false
     if precision > 0 || hash
         buf[pos] = decchar
         pos += 1
+        hasfractional = true
     end
     if e2 < 0
         idx = div(-e2, 16)
@@ -166,11 +136,12 @@
             dotPos = 1
             while true
                 roundPos -= 1
-                if roundPos == (startpos - 1) || (buf[roundPos] == UInt8('-'))
+                if roundPos == (startpos - 1) || (buf[roundPos] == UInt8('-')) || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
                     buf[roundPos + 1] = UInt8('1')
                     if dotPos > 1
                         buf[dotPos] = UInt8('0')
                         buf[dotPos + 1] = decchar
+                        hasfractional = true
                     end
                     buf[pos] = UInt8('0')
                     pos += 1
@@ -199,7 +170,7 @@
             pos += 1
         end
     end
-    if trimtrailingzeros
+    if trimtrailingzeros && hasfractional
         while buf[pos - 1] == UInt8('0')
             pos -= 1
         end
diff --git a/base/ryu/shortest.jl b/base/ryu/shortest.jl
index 52502fecf96103..f95c09d235e6df 100644
--- a/base/ryu/shortest.jl
+++ b/base/ryu/shortest.jl
@@ -224,13 +224,11 @@ integer. If a `maxsignif` argument is provided, then `b < maxsignif`.
     return b, e10
 end
 
-
-@inline function writeshortest(buf::Vector{UInt8}, pos, x::T,
-                               plus=false, space=false, hash=true,
-                               precision=-1, expchar=UInt8('e'), padexp=false, decchar=UInt8('.'),
-                               typed=false, compact=false) where {T}
+function writeshortest(buf::Vector{UInt8}, pos, x::T,
+                       plus=false, space=false, hash=true,
+                       precision=-1, expchar=UInt8('e'), padexp=false, decchar=UInt8('.'),
+                       typed=false, compact=false) where {T}
     @assert 0 < pos <= length(buf)
-    neg = signbit(x)
     # special cases
     if x == 0
         if typed && x isa Float16
@@ -244,17 +242,7 @@ end
             buf[pos + 7] = UInt8('(')
             pos += 8
         end
-
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
+        pos = append_sign(x, plus, space, buf, pos)
         buf[pos] = UInt8('0')
         pos += 1
         if hash
@@ -291,6 +279,7 @@ end
         end
         return pos
     elseif isnan(x)
+        pos = append_sign(x, plus, space, buf, pos)
         buf[pos] = UInt8('N')
         buf[pos + 1] = UInt8('a')
         buf[pos + 2] = UInt8('N')
@@ -305,22 +294,20 @@ end
         end
         return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
     elseif !isfinite(x)
-        if neg
-            buf[pos] = UInt8('-')
-        end
-        buf[pos + neg] = UInt8('I')
-        buf[pos + neg + 1] = UInt8('n')
-        buf[pos + neg + 2] = UInt8('f')
+        pos = append_sign(x, plus, space, buf, pos)
+        buf[pos] = UInt8('I')
+        buf[pos + 1] = UInt8('n')
+        buf[pos + 2] = UInt8('f')
         if typed
             if x isa Float32
-                buf[pos + neg + 3] = UInt8('3')
-                buf[pos + neg + 4] = UInt8('2')
+                buf[pos + 3] = UInt8('3')
+                buf[pos + 4] = UInt8('2')
             elseif x isa Float16
-                buf[pos + neg + 3] = UInt8('1')
-                buf[pos + neg + 4] = UInt8('6')
+                buf[pos + 3] = UInt8('1')
+                buf[pos + 4] = UInt8('6')
             end
         end
-        return pos + neg + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
+        return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
     end
 
     output, nexp = reduce_shortest(x, compact ? 999_999 : nothing)
@@ -336,16 +323,7 @@ end
         buf[pos + 7] = UInt8('(')
         pos += 8
     end
-    if neg
-        buf[pos] = UInt8('-')
-        pos += 1
-    elseif plus
-        buf[pos] = UInt8('+')
-        pos += 1
-    elseif space
-        buf[pos] = UInt8(' ')
-        pos += 1
-    end
+    pos = append_sign(x, plus, space, buf, pos)
 
     olength = decimallength(output)
     exp_form = true
diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl
index 24e38a0a50241d..352f8f19cb9bed 100644
--- a/base/ryu/utils.jl
+++ b/base/ryu/utils.jl
@@ -195,6 +195,20 @@ Compute `(m * mul) >> j % 10^9` where `mul = mula + mulb<<64 + mulc<<128`, and `
     return (v % UInt32) - UInt32(1000000000) * shifted
 end
 
+@inline function append_sign(x, plus, space, buf, pos)
+    if signbit(x) && !isnan(x)  # suppress minus sign for signaling NaNs
+        buf[pos] = UInt8('-')
+        pos += 1
+    elseif plus
+        buf[pos] = UInt8('+')
+        pos += 1
+    elseif space
+        buf[pos] = UInt8(' ')
+        pos += 1
+    end
+    return pos
+end
+
 @inline function append_n_digits(olength, digits, buf, pos)
     i = 0
     while digits >= 10000
diff --git a/base/set.jl b/base/set.jl
index fb837fbf861662..6511d1dd7e108e 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -3,13 +3,21 @@
 struct Set{T} <: AbstractSet{T}
     dict::Dict{T,Nothing}
 
-    Set{T}() where {T} = new(Dict{T,Nothing}())
-    Set{T}(s::Set{T}) where {T} = new(Dict{T,Nothing}(s.dict))
+    global _Set(dict::Dict{T,Nothing}) where {T} = new{T}(dict)
 end
 
+Set{T}() where {T} = _Set(Dict{T,Nothing}())
+Set{T}(s::Set{T}) where {T} = _Set(Dict{T,Nothing}(s.dict))
 Set{T}(itr) where {T} = union!(Set{T}(), itr)
 Set() = Set{Any}()
 
+function Set{T}(s::KeySet{T, <:Dict{T}}) where {T}
+    d = s.dict
+    slots = copy(d.slots)
+    keys = copy(d.keys)
+    vals = similar(d.vals, Nothing)
+    _Set(Dict{T,Nothing}(slots, keys, vals, d.ndel, d.count, d.age, d.idxfloor, d.maxprobe))
+end
 
 """
     Set([itr])
@@ -17,6 +25,8 @@ Set() = Set{Any}()
 Construct a [`Set`](@ref) of the values generated by the given iterable object, or an
 empty set. Should be used instead of [`BitSet`](@ref) for sparse integer sets, or
 for sets of arbitrary objects.
+
+See also: [`push!`](@ref), [`empty!`](@ref), [`union!`](@ref), [`in`](@ref).
 """
 Set(itr) = _Set(itr, IteratorEltype(itr))
 
@@ -105,6 +115,8 @@ as determined by [`isequal`](@ref), in the order that the first of each
 set of equivalent elements originally appears. The element type of the
 input is preserved.
 
+See also: [`unique!`](@ref), [`allunique`](@ref).
+
 # Examples
 ```jldoctest
 julia> unique([1, 2, 6, 2])
@@ -371,6 +383,8 @@ end
 
 Return `true` if all values from `itr` are distinct when compared with [`isequal`](@ref).
 
+See also: [`unique`](@ref), [`issorted`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = [1; 2; 3]
@@ -379,6 +393,9 @@ julia> a = [1; 2; 3]
  2
  3
 
+julia> allunique(a)
+true
+
 julia> allunique([a, a])
 false
 ```
@@ -537,7 +554,10 @@ of the result will not include singleton types which are replaced with values of
 a different type: for example, `Union{T,Missing}` will become `T` if `missing` is
 replaced.
 
-See also [`replace!`](@ref).
+See also [`replace!`](@ref), [`splice!`](@ref), [`delete!`](@ref), [`insert!`](@ref).
+
+!!! compat "Julia 1.7"
+    Version 1.7 is required to replace elements of a `Tuple`.
 
 # Examples
 ```jldoctest
@@ -587,6 +607,9 @@ Return a copy of `A` where each value `x` in `A` is replaced by `new(x)`.
 If `count` is specified, then replace at most `count` values in total
 (replacements being defined as `new(x) !== x`).
 
+!!! compat "Julia 1.7"
+    Version 1.7 is required to replace elements of a `Tuple`.
+
 # Examples
 ```jldoctest
 julia> replace(x -> isodd(x) ? 2x : x, [1, 2, 3, 4])
@@ -612,7 +635,6 @@ replace!(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace!,
 replace!(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace!, (a, b, c)))
 replace(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b)))
 replace(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b, c)))
-replace(a::AbstractString, b::Pair, c::Pair) = throw(MethodError(replace, (a, b, c)))
 
 ### replace! for AbstractDict/AbstractSet
 
@@ -747,7 +769,7 @@ replace(f::Callable, t::Tuple; count::Integer=typemax(Int)) =
 
 function _replace(t::Tuple, count::Int, old_new::Tuple{Vararg{Pair}})
     _replace(t, count) do x
-        @_inline_meta
+        @inline
         for o_n in old_new
             isequal(first(o_n), x) && return last(o_n)
         end
diff --git a/base/shell.jl b/base/shell.jl
index 48fcb4079bda0c..bcece48681e5c9 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -87,15 +87,25 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
             elseif !in_single_quotes && c == '"'
                 in_double_quotes = !in_double_quotes
                 i = consume_upto!(arg, s, i, j)
-            elseif c == '\\'
-                if in_double_quotes
+            elseif !in_single_quotes && c == '\\'
+                if !isempty(st) && peek(st)[2] in ('\n', '\r')
+                    i = consume_upto!(arg, s, i, j) + 1
+                    if popfirst!(st)[2] == '\r' && peek(st)[2] == '\n'
+                        i += 1
+                        popfirst!(st)
+                    end
+                    while !isempty(st) && peek(st)[2] in (' ', '\t')
+                        i = nextind(str, i)
+                        _ = popfirst!(st)
+                    end
+                elseif in_double_quotes
                     isempty(st) && error("unterminated double quote")
                     k, c′ = peek(st)
                     if c′ == '"' || c′ == '$' || c′ == '\\'
                         i = consume_upto!(arg, s, i, j)
                         _ = popfirst!(st)
                     end
-                elseif !in_single_quotes
+                else
                     isempty(st) && error("dangling backslash")
                     i = consume_upto!(arg, s, i, j)
                     _ = popfirst!(st)
@@ -251,6 +261,53 @@ julia> Base.shell_escape_posixly("echo", "this", "&&", "that")
 shell_escape_posixly(args::AbstractString...) =
     sprint(print_shell_escaped_posixly, args...)
 
+"""
+    shell_escape_csh(args::Union{Cmd,AbstractString...})
+    shell_escape_csh(io::IO, args::Union{Cmd,AbstractString...})
+
+This function quotes any metacharacters in the string arguments such
+that the string returned can be inserted into a command-line for
+interpretation by the Unix C shell (csh, tcsh), where each string
+argument will form one word.
+
+In contrast to a POSIX shell, csh does not support the use of the
+backslash as a general escape character in double-quoted strings.
+Therefore, this function wraps strings that might contain
+metacharacters in single quotes, except for parts that contain single
+quotes, which it wraps in double quotes instead. It switches between
+these types of quotes as needed. Linefeed characters are escaped with
+a backslash.
+
+This function should also work for a POSIX shell, except if the input
+string contains a linefeed (`"\\n"`) character.
+
+See also: [`shell_escape_posixly`](@ref)
+"""
+function shell_escape_csh(io::IO, args::AbstractString...)
+    first = true
+    for arg in args
+        first || write(io, ' ')
+        first = false
+        i = 1
+        while true
+            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z" => "",
+                         r"^[^']*\z" => "'", r"^[^\$\`\"]*\z" => "\"",
+                         r"^[^']+"  => "'", r"^[^\$\`\"]+"  => "\"")
+                if ((m = match(r, SubString(arg, i))) !== nothing)
+                    write(io, e)
+                    write(io, replace(m.match, '\n' => "\\\n"))
+                    write(io, e)
+                    i += ncodeunits(m.match)
+                    break
+                end
+            end
+            i <= lastindex(arg) || break
+        end
+    end
+end
+shell_escape_csh(args::AbstractString...) =
+    sprint(shell_escape_csh, args...;
+           sizehint = sum(sizeof.(args)) + length(args) * 3)
 
 """
     shell_escape_wincmd(s::AbstractString)
@@ -285,11 +342,11 @@ This function may be useful in concert with the `windows_verbatim` flag to
 
 ```julia
 wincmd(c::String) =
-   run(Cmd(Cmd(["cmd.exe", "/s /c \" \$c \""]);
+   run(Cmd(Cmd(["cmd.exe", "/s /c \\" \$c \\""]);
            windows_verbatim=true))
 wincmd_echo(s::String) =
    wincmd("echo " * Base.shell_escape_wincmd(s))
-wincmd_echo("hello \$(ENV["USERNAME"]) & the \"whole\" world! (=^I^=)")
+wincmd_echo("hello \$(ENV["USERNAME"]) & the \\"whole\\" world! (=^I^=)")
 ```
 
 But take note that if the input string `s` contains a `%`, the argument list
@@ -316,13 +373,13 @@ run(setenv(`cmd /C echo %cmdargs%`, "cmdargs" => cmdargs))
     ```julia
     to_print = "All for 1 & 1 for all!"
     to_print_esc = Base.shell_escape_wincmd(Base.shell_escape_wincmd(to_print))
-    run(Cmd(Cmd(["cmd", "/S /C \" break | echo \$(to_print_esc) \""]), windows_verbatim=true))
+    run(Cmd(Cmd(["cmd", "/S /C \\" break | echo \$(to_print_esc) \\""]), windows_verbatim=true))
     ```
 
 With an I/O stream parameter `io`, the result will be written there,
 rather than returned as a string.
 
-See also: [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref)
+See also [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref).
 
 # Example
 ```jldoctest
@@ -376,7 +433,7 @@ It joins command-line arguments to be passed to a Windows
 C/C++/Julia application into a command line, escaping or quoting the
 meta characters space, TAB, double quote and backslash where needed.
 
-See also: [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref)
+See also [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref).
 """
 function escape_microsoft_c_args(io::IO, args::AbstractString...)
     # http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
diff --git a/base/show.jl b/base/show.jl
index 278a5c96df6154..f3110d59814295 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -45,6 +45,7 @@ function show(io::IO, ::MIME"text/plain", f::Function)
 end
 
 show(io::IO, ::MIME"text/plain", c::ComposedFunction) = show(io, c)
+show(io::IO, ::MIME"text/plain", c::Returns) = show(io, c)
 
 function show(io::IO, ::MIME"text/plain", iter::Union{KeySet,ValueIterator})
     isempty(iter) && get(io, :compact, false) && return show(io, iter)
@@ -583,6 +584,69 @@ function make_typealias(@nospecialize(x::Type))
     end
 end
 
+isgensym(s::Symbol) = '#' in string(s)
+
+function show_can_elide(p::TypeVar, wheres::Vector, elide::Int, env::SimpleVector, skip::Int)
+    elide == 0 && return false
+    wheres[elide] === p || return false
+    for i = (elide + 1):length(wheres)
+        v = wheres[i]::TypeVar
+        has_typevar(v.lb, p) && return false
+        has_typevar(v.ub, p) && return false
+    end
+    for i = 1:length(env)
+        i == skip && continue
+        has_typevar(env[i], p) && return false
+    end
+    return true
+end
+
+function show_typeparams(io::IO, env::SimpleVector, orig::SimpleVector, wheres::Vector)
+    n = length(env)
+    elide = length(wheres)
+    function egal_var(p::TypeVar, @nospecialize o)
+        return o isa TypeVar &&
+            ccall(:jl_types_egal, Cint, (Any, Any), p.ub, o.ub) != 0 &&
+            ccall(:jl_types_egal, Cint, (Any, Any), p.lb, o.lb) != 0
+    end
+    for i = n:-1:1
+        p = env[i]
+        if p isa TypeVar
+            if i == n && egal_var(p, orig[i]) && show_can_elide(p, wheres, elide, env, i)
+                n -= 1
+                elide -= 1
+            elseif p.lb === Union{} && isgensym(p.name) && show_can_elide(p, wheres, elide, env, i)
+                elide -= 1
+            elseif p.ub === Any && isgensym(p.name) && show_can_elide(p, wheres, elide, env, i)
+                elide -= 1
+            end
+        end
+    end
+    if n > 0
+        print(io, "{")
+        for i = 1:n
+            p = env[i]
+            if p isa TypeVar
+                if p.lb === Union{} && something(findfirst(@nospecialize(w) -> w === p, wheres), 0) > elide
+                    print(io, "<:")
+                    show(io, p.ub)
+                elseif p.ub === Any && something(findfirst(@nospecialize(w) -> w === p, wheres), 0) > elide
+                    print(io, ">:")
+                    show(io, p.lb)
+                else
+                    show(io, p)
+                end
+            else
+                show(io, p)
+            end
+            i < n && print(io, ", ")
+        end
+        print(io, "}")
+    end
+    resize!(wheres, elide)
+    nothing
+end
+
 function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector, wheres::Vector)
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless alias is visible from module passed to
@@ -595,21 +659,19 @@ function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector, whe
         end
     end
     print(io, name.name)
-    n = length(env)
-    n == 0 && return
-
-    print(io, "{")
-    param_io = IOContext(io)
-    for i = 1:length(wheres)
-        p = wheres[i]::TypeVar
-        param_io = IOContext(param_io, :unionall_env => p)
+    isempty(env) && return
+    io = IOContext(io)
+    for p in wheres
+        io = IOContext(io, :unionall_env => p)
     end
-    for i = 1:n
-        p = env[i]
-        show(param_io, p)
-        i < n && print(io, ", ")
+    orig = getfield(name.mod, name.name)
+    vars = TypeVar[]
+    while orig isa UnionAll
+        push!(vars, orig.var)
+        orig = orig.body
     end
-    print(io, "}")
+    show_typeparams(io, env, Core.svec(vars...), wheres)
+    nothing
 end
 
 function make_wheres(io::IO, env::SimpleVector, @nospecialize(x::Type))
@@ -642,12 +704,12 @@ function make_wheres(io::IO, env::SimpleVector, @nospecialize(x::Type))
     return wheres
 end
 
-function show_wheres(io::IO, env::Vector)
-    isempty(env) && return
+function show_wheres(io::IO, wheres::Vector)
+    isempty(wheres) && return
     io = IOContext(io)
-    n = length(env)
+    n = length(wheres)
     for i = 1:n
-        p = env[i]::TypeVar
+        p = wheres[i]::TypeVar
         print(io, n == 1 ? " where " : i == 1 ? " where {" : ", ")
         show(io, p)
         io = IOContext(io, :unionall_env => p)
@@ -796,9 +858,9 @@ function show(io::IO, ::MIME"text/plain", @nospecialize(x::Type))
         if make_typealias(properx) !== nothing || (unwrap_unionall(x) isa Union && x <: make_typealiases(properx)[2])
             show(IOContext(io, :compact => true), x)
             if !(get(io, :compact, false)::Bool)
-                print(io, " (alias for ")
-                show(IOContext(io, :compact => false), x)
-                print(io, ")")
+                printstyled(io, " (alias for "; color = :light_black)
+                printstyled(IOContext(io, :compact => false), x, color = :light_black)
+                printstyled(io, ")"; color = :light_black)
             end
             return
         end
@@ -857,7 +919,11 @@ function _show_type(io::IO, @nospecialize(x::Type))
             push!(wheres, var)
             io = IOContext(io, :unionall_env => var)
         end
-        show(io, x)
+        if x isa DataType
+            show_datatype(io, x, wheres)
+        else
+            show(io, x)
+        end
     end
     show_wheres(io, wheres)
 end
@@ -919,29 +985,23 @@ function show_type_name(io::IO, tn::Core.TypeName)
     nothing
 end
 
-function show_datatype(io::IO, @nospecialize(x::DataType))
+function show_datatype(io::IO, @nospecialize(x::DataType), wheres::Vector=TypeVar[])
     parameters = x.parameters::SimpleVector
     istuple = x.name === Tuple.name
     n = length(parameters)
 
     # Print homogeneous tuples with more than 3 elements compactly as NTuple{N, T}
-    if istuple && n > 3 && all(i -> (parameters[1] === i), parameters)
-        print(io, "NTuple{", n, ", ", parameters[1], "}")
+    if istuple
+        if n > 3 && all(@nospecialize(i) -> (parameters[1] === i), parameters)
+            print(io, "NTuple{", n, ", ", parameters[1], "}")
+        else
+            print(io, "Tuple{")
+            join(io, parameters, ", ")
+            print(io, "}")
+        end
     else
         show_type_name(io, x.name)
-        if (n > 0 || istuple) && x !== Tuple
-            # Do not print the type parameters for the primary type if we are
-            # printing a method signature or type parameter.
-            # Always print the type parameter if we are printing the type directly
-            # since this information is still useful.
-            print(io, '{')
-            for i = 1:n
-                p = parameters[i]
-                show(io, p)
-                i < n && print(io, ", ")
-            end
-            print(io, '}')
-        end
+        show_typeparams(io, parameters, unwrap_unionall(x.name.wrapper).parameters, wheres)
     end
 end
 
@@ -956,9 +1016,22 @@ end
 show_supertypes(typ::DataType) = show_supertypes(stdout, typ)
 
 """
-    @show
+    @show exs...
+
+Prints one or more expressions, and their results, to `stdout`, and returns the last result.
+
+See also: [`show`](@ref), [`@info`](@ref man-logging), [`println`](@ref).
+
+# Examples
+```jldoctest
+julia> x = @show 1+2
+1 + 2 = 3
+3
 
-Show an expression and result, returning the result. See also [`show`](@ref).
+julia> @show x^2 x/2;
+x ^ 2 = 9
+x / 2 = 1.5
+```
 """
 macro show(exs...)
     blk = Expr(:block)
@@ -1204,7 +1277,7 @@ const ExprNode = Union{Expr, QuoteNode, Slot, LineNumberNode, SSAValue,
 # IOContext(io, :unquote_fallback => false) tells show_unquoted to treat any
 # Expr whose head is :$ as if it is inside a quote, preventing fallback to the
 # "unhandled" case: this is used by print/string to be lawful to Rule 1 above.
-# On the countrary, show/repr have to follow Rule 2, requiring any Expr whose
+# On the contrary, show/repr have to follow Rule 2, requiring any Expr whose
 # head is :$ and which is not inside a quote to fallback to the "unhandled" case:
 # this is behavior is triggered by IOContext(io, :unquote_fallback => true)
 print(        io::IO, ex::ExprNode)    = (show_unquoted(IOContext(io, :unquote_fallback => false), ex, 0, -1); nothing)
@@ -1230,6 +1303,7 @@ const expr_calls  = Dict(:call => ('(',')'), :calldecl => ('(',')'),
                          :ref => ('[',']'), :curly => ('{','}'), :(.) => ('(',')'))
 const expr_parens = Dict(:tuple=>('(',')'), :vcat=>('[',']'),
                          :hcat =>('[',']'), :row =>('[',']'), :vect=>('[',']'),
+                         :ncat =>('[',']'), :nrow =>('[',']'),
                          :braces=>('{','}'), :bracescat=>('{','}'))
 
 ## AST decoding helpers ##
@@ -1257,9 +1331,10 @@ julia> Meta.isidentifier(:x), Meta.isidentifier("1x")
 ```
 """
 function isidentifier(s::AbstractString)
-    isempty(s) && return false
+    x = Iterators.peel(s)
+    isnothing(x) && return false
     (s == "true" || s == "false") && return false
-    c, rest = Iterators.peel(s)
+    c, rest = x
     is_id_start_char(c) || return false
     return all(is_id_char, rest)
 end
@@ -1381,8 +1456,7 @@ function operator_associativity(s::Symbol)
     return :left
 end
 
-is_expr(@nospecialize(ex), head::Symbol)         = isa(ex, Expr) && (ex.head === head)
-is_expr(@nospecialize(ex), head::Symbol, n::Int) = is_expr(ex, head) && length(ex.args) == n
+const is_expr = isexpr
 
 is_quoted(ex)            = false
 is_quoted(ex::QuoteNode) = true
@@ -1404,8 +1478,8 @@ emphasize(io, str::AbstractString, col = Base.error_color()) = get(io, :color, f
     printstyled(io, str; color=col, bold=true) :
     print(io, uppercase(str))
 
-show_linenumber(io::IO, line)       = print(io, "#= line ", line, " =#")
-show_linenumber(io::IO, line, file) = print(io, "#= ", file, ":", line, " =#")
+show_linenumber(io::IO, line)       = printstyled(io, "#= line ", line, " =#", color=:light_black)
+show_linenumber(io::IO, line, file) = printstyled(io, "#= ", file, ":", line, " =#", color=:light_black)
 show_linenumber(io::IO, line, file::Nothing) = show_linenumber(io, line)
 
 # show a block, e g if/for/etc
@@ -1738,14 +1812,16 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
 
     # list-like forms, e.g. "[1, 2, 3]"
     elseif haskey(expr_parens, head) ||                          # :vcat etc.
-        head === :typed_vcat || head === :typed_hcat
+        head === :typed_vcat || head === :typed_hcat || head === :typed_ncat
         # print the type and defer to the untyped case
-        if head === :typed_vcat || head === :typed_hcat
+        if head === :typed_vcat || head === :typed_hcat || head === :typed_ncat
             show_unquoted(io, args[1], indent, prec, quote_level)
             if head === :typed_vcat
                 head = :vcat
-            else
+            elseif head === :typed_hcat
                 head = :hcat
+            else
+                head = :ncat
             end
             args = args[2:end]
             nargs = nargs - 1
@@ -1755,15 +1831,19 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
             sep = "; "
         elseif head === :hcat || head === :row
             sep = " "
+        elseif head === :ncat || head === :nrow
+            sep = ";"^args[1]::Int * " "
+            args = args[2:end]
+            nargs = nargs - 1
         else
             sep = ", "
         end
-        head !== :row && print(io, op)
+        head !== :row && head !== :nrow && print(io, op)
         show_list(io, args, sep, indent, 0, quote_level)
-        if nargs == 1 && head === :vcat
-            print(io, ';')
+        if nargs <= 1 && (head === :vcat || head === :ncat)
+            print(io, sep[1:end-1])
         end
-        head !== :row && print(io, cl)
+        head !== :row && head !== :nrow && print(io, cl)
 
     # transpose
     elseif (head === Symbol("'") && nargs == 1) || (
@@ -2226,7 +2306,7 @@ function show_signature_function(io::IO, @nospecialize(ft), demangle=false, farg
         uwf = unwrap_unionall(f)
         parens = isa(f, UnionAll) && !(isa(uwf, DataType) && f === uwf.name.wrapper)
         parens && print(io, "(")
-        show(io, f)
+        print_within_stacktrace(io, f, bold=true)
         parens && print(io, ")")
     else
         if html
@@ -2298,7 +2378,9 @@ end
 function print_type_stacktrace(io, type; color=:normal)
     str = sprint(show, type, context=io)
     i = findfirst('{', str)
-    if i === nothing || !get(io, :backtrace, false)::Bool
+    if !get(io, :backtrace, false)::Bool
+        print(io, str)
+    elseif i === nothing
         printstyled(io, str; color=color)
     else
         printstyled(io, str[1:prevind(str,i)]; color=color)
@@ -2416,7 +2498,7 @@ function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
         # TODO: static parameter values?
         # only accepts :source or :none, we can't have a fallback for default since
         # that would break code_typed(, debuginfo=:source) iff IRShow.default_debuginfo[] = :none
-        IRShow.show_ir(lambda_io, src, IRShow.__debuginfo[debuginfo](src))
+        IRShow.show_ir(lambda_io, src, IRShow.IRShowConfig(IRShow.__debuginfo[debuginfo](src)))
     else
         # this is a CodeInfo that has not been used as a method yet, so its locations are still LineNumberNodes
         body = Expr(:block)
@@ -2543,7 +2625,7 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
     if x !== Any
         print(io, " <: ", supertype(x))
     end
-    if n > 0 && !(x <: Tuple) && !x.abstract
+    if n > 0 && !(x <: Tuple) && !isabstracttype(x)
         tvar_io::IOContext = io
         for tparam in x.parameters
             # approximately recapture the list of tvar parameterization
@@ -2709,6 +2791,9 @@ function array_summary(io::IO, a, inds)
     print(io, " with indices ", inds2string(inds))
 end
 
+## `summary` for Function
+summary(io::IO, f::Function) = show(io, MIME"text/plain"(), f)
+
 """
     showarg(io::IO, x, toplevel)
 
@@ -2772,7 +2857,7 @@ function showarg(io::IO, v::SubArray, toplevel)
     toplevel && print(io, " with eltype ", eltype(v))
     return nothing
 end
-showindices(io, ::Union{Slice,IdentityUnitRange}, inds...) =
+showindices(io, ::Slice, inds...) =
     (print(io, ", :"); showindices(io, inds...))
 showindices(io, ind1, inds...) =
     (print(io, ", ", ind1); showindices(io, inds...))
diff --git a/base/simdloop.jl b/base/simdloop.jl
index e0b6d89d972775..29e2382cf39aa8 100644
--- a/base/simdloop.jl
+++ b/base/simdloop.jl
@@ -8,7 +8,7 @@ export @simd, simd_outer_range, simd_inner_length, simd_index
 
 # Error thrown from ill-formed uses of @simd
 struct SimdError <: Exception
-    msg::AbstractString
+    msg::String
 end
 
 # Parse iteration space expression
diff --git a/base/some.jl b/base/some.jl
index 272ed0e00ce318..8be58739a4df41 100644
--- a/base/some.jl
+++ b/base/some.jl
@@ -64,18 +64,20 @@ Return `true` if `x === nothing`, and return `false` if not.
 
 !!! compat "Julia 1.1"
     This function requires at least Julia 1.1.
+
+See also [`something`](@ref), [`notnothing`](@ref), [`ismissing`](@ref).
 """
 isnothing(x) = x === nothing
 
 
 """
-    something(x, y...)
+    something(x...)
 
 Return the first value in the arguments which is not equal to [`nothing`](@ref),
 if any. Otherwise throw an error.
 Arguments of type [`Some`](@ref) are unwrapped.
 
-See also [`coalesce`](@ref).
+See also [`coalesce`](@ref), [`skipmissing`](@ref), [`@something`](@ref).
 
 # Examples
 ```jldoctest
@@ -98,3 +100,46 @@ something() = throw(ArgumentError("No value arguments present"))
 something(x::Nothing, y...) = something(y...)
 something(x::Some, y...) = x.value
 something(x::Any, y...) = x
+
+
+"""
+    @something(x...)
+
+Short-circuiting version of [`something`](@ref).
+
+# Examples
+```jldoctest
+julia> f(x) = (println("f(\$x)"); nothing);
+
+julia> a = 1;
+
+julia> a = @something a f(2) f(3) error("Unable to find default for `a`")
+1
+
+julia> b = nothing;
+
+julia> b = @something b f(2) f(3) error("Unable to find default for `b`")
+f(2)
+f(3)
+ERROR: Unable to find default for `b`
+[...]
+
+julia> b = @something b f(2) f(3) Some(nothing)
+f(2)
+f(3)
+
+julia> b === nothing
+true
+```
+
+!!! compat "Julia 1.7"
+    This macro is available as of Julia 1.7.
+"""
+macro something(args...)
+    expr = :(nothing)
+    for arg in reverse(args)
+        expr = :(val = $(esc(arg)); val !== nothing ? val : ($expr))
+    end
+    something = GlobalRef(Base, :something)
+    return :($something($expr))
+end
diff --git a/base/sort.jl b/base/sort.jl
index 6a3883215c0b46..afde46232cec1e 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -68,7 +68,7 @@ function issorted(itr, order::Ordering)
 end
 
 """
-    issorted(v, lt=isless, by=identity, rev:Bool=false, order::Ordering=Forward)
+    issorted(v, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
 Test whether a vector is in sorted order. The `lt`, `by` and `rev` keywords modify what
 order is considered to be sorted just as they do for [`sort`](@ref).
@@ -231,89 +231,59 @@ end
 
 function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
     require_one_based_indexing(a)
-    if step(a) == 0
-        lt(o, x, first(a)) ? 0 : length(a)
+    f, h, l = first(a), step(a), last(a)
+    if lt(o, x, f)
+        0
+    elseif h == 0 || !lt(o, x, l)
+        length(a)
     else
-        n = round(Integer, clamp((x - first(a)) / step(a) + 1, 1, length(a)))
+        n = round(Integer, (x - f) / h + 1)
         lt(o, x, a[n]) ? n - 1 : n
     end
 end
 
 function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
     require_one_based_indexing(a)
-    if step(a) == 0
-        lt(o, first(a), x) ? length(a) + 1 : 1
+    f, h, l = first(a), step(a), last(a)
+    if !lt(o, f, x)
+        1
+    elseif h == 0 || lt(o, l, x)
+        length(a) + 1
     else
-        n = round(Integer, clamp((x - first(a)) / step(a) + 1, 1, length(a)))
+        n = round(Integer, (x - f) / h + 1)
         lt(o, a[n], x) ? n + 1 : n
     end
 end
 
 function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
     require_one_based_indexing(a)
-    h = step(a)
-    if h == 0
-        lt(o, x, first(a)) ? 0 : length(a)
-    elseif h > 0 && x < first(a)
-        firstindex(a) - 1
-    elseif h > 0 && x >= last(a)
-        lastindex(a)
-    elseif h < 0 && x > first(a)
-        firstindex(a) - 1
-    elseif h < 0 && x <= last(a)
-        lastindex(a)
+    f, h, l = first(a), step(a), last(a)
+    if lt(o, x, f)
+        0
+    elseif h == 0 || !lt(o, x, l)
+        length(a)
     else
         if o isa ForwardOrdering
-            fld(floor(Integer, x) - first(a), h) + 1
+            fld(floor(Integer, x) - f, h) + 1
         else
-            fld(ceil(Integer, x) - first(a), h) + 1
+            fld(ceil(Integer, x) - f, h) + 1
         end
     end
 end
 
 function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
     require_one_based_indexing(a)
-    h = step(a)
-    if h == 0
-        lt(o, first(a), x) ? length(a)+1 : 1
-    elseif h > 0 && x <= first(a)
-        firstindex(a)
-    elseif h > 0 && x > last(a)
-        lastindex(a) + 1
-    elseif h < 0 && x >= first(a)
-        firstindex(a)
-    elseif h < 0 && x < last(a)
-        lastindex(a) + 1
+    f, h, l = first(a), step(a), last(a)
+    if !lt(o, f, x)
+        1
+    elseif h == 0 || lt(o, l, x)
+        length(a) + 1
     else
         if o isa ForwardOrdering
-            -fld(floor(Integer, -x) + Signed(first(a)), h) + 1
-        else
-            -fld(ceil(Integer, -x) + Signed(first(a)), h) + 1
-        end
-    end
-end
-
-function searchsortedfirst(a::AbstractRange{<:Integer}, x::Unsigned, o::DirectOrdering)::keytype(a)
-    require_one_based_indexing(a)
-    if lt(o, first(a), x)
-        if step(a) == 0
-            length(a) + 1
+            cld(ceil(Integer, x) - f, h) + 1
         else
-            min(cld(x - first(a), step(a)), length(a)) + 1
+            cld(floor(Integer, x) - f, h) + 1
         end
-    else
-        1
-    end
-end
-
-function searchsortedlast(a::AbstractRange{<:Integer}, x::Unsigned, o::DirectOrdering)::keytype(a)
-    require_one_based_indexing(a)
-    if lt(o, x, first(a))
-        0
-    elseif step(a) == 0
-        length(a)
-    else
-        min(fld(x - first(a), step(a)) + 1, length(a))
     end
 end
 
@@ -337,6 +307,8 @@ according to the order specified by the `by`, `lt` and `rev` keywords, assuming
 is already sorted in that order. Return an empty range located at the insertion point
 if `a` does not contain values equal to `x`.
 
+See also: [`insorted`](@ref), [`searchsortedfirst`](@ref), [`sort`](@ref), [`findall`](@ref).
+
 # Examples
 ```jldoctest
 julia> searchsorted([1, 2, 4, 5, 5, 7], 4) # single match
@@ -363,6 +335,8 @@ Return the index of the first value in `a` greater than or equal to `x`, accordi
 specified order. Return `length(a) + 1` if `x` is greater than all values in `a`.
 `a` is assumed to be sorted.
 
+See also: [`searchsortedlast`](@ref), [`searchsorted`](@ref), [`findfirst`](@ref).
+
 # Examples
 ```jldoctest
 julia> searchsortedfirst([1, 2, 4, 5, 5, 7], 4) # single match
@@ -409,13 +383,14 @@ julia> searchsortedlast([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
 """ searchsortedlast
 
 """
-    insorted(a, x; by=<transform>, lt=<comparison>, rev=false)
+    insorted(a, x; by=<transform>, lt=<comparison>, rev=false) -> Bool
 
 Determine whether an item is in the given sorted collection, in the sense that
 it is [`==`](@ref) to one of the values of the collection according to the order
 specified by the `by`, `lt` and `rev` keywords, assuming that `a` is already
-sorted in that order, see [`sort`](@ref) for the keywords. See also
-[`in`](@ref). Returns a `Bool` value.
+sorted in that order, see [`sort`](@ref) for the keywords.
+
+See also [`in`](@ref).
 
 # Examples
 ```jldoctest
@@ -694,7 +669,8 @@ Sort the vector `v` in place. [`QuickSort`](@ref) is used by default for numeric
 [`MergeSort`](@ref) is used for other arrays. You can specify an algorithm to use via the `alg`
 keyword (see [Sorting Algorithms](@ref) for available algorithms). The `by` keyword lets you provide
 a function that will be applied to each element before comparison; the `lt` keyword allows
-providing a custom "less than" function; use `rev=true` to reverse the sorting order. These
+providing a custom "less than" function (note that for every `x` and `y`, only one of `lt(x,y)`
+and `lt(y,x)` can return `true`); use `rev=true` to reverse the sorting order. These
 options are independent and can be used together in all possible combinations: if both `by`
 and `lt` are specified, the `lt` function is applied to the result of the `by` function;
 `rev=true` reverses whatever ordering specified via the `by` and `lt` keywords.
@@ -907,7 +883,7 @@ using the same keywords as [`sort!`](@ref). The permutation is guaranteed to be
 if the sorting algorithm is unstable, meaning that indices of equal elements appear in
 ascending order.
 
-See also [`sortperm!`](@ref).
+See also [`sortperm!`](@ref), [`partialsortperm`](@ref), [`invperm`](@ref), [`indexin`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/base/special/cbrt.jl b/base/special/cbrt.jl
index 978f7cf89971ea..9fda5c41fb09e0 100644
--- a/base/special/cbrt.jl
+++ b/base/special/cbrt.jl
@@ -146,3 +146,20 @@ function cbrt(x::Union{Float32,Float64})
     t = _approx_cbrt(x)
     return _improve_cbrt(x, t)
 end
+
+function cbrt(a::Float16)
+    if !isfinite(a) || iszero(a)
+        return a
+    end
+    x = Float32(a)
+
+    # 5 bit approximation. Simpler than _approx_cbrt since subnormals can not appear
+    u = highword(x) & 0x7fff_ffff
+    v = div(u, UInt32(3)) + 0x2a5119f2
+    t = copysign(fromhighword(Float32, v), x)
+
+    # 2 newton iterations
+    t = 0.33333334f0 * (2f0*t + x/(t*t))
+    t = 0.33333334f0 * (2f0*t + x/(t*t))
+    return Float16(t)
+end
diff --git a/base/special/exp.jl b/base/special/exp.jl
index 5dffcde3858120..110cd93410d73a 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int.
 # This works because eps(MAGIC_ROUND_CONST(T)) == one(T), so adding it to a smaller number aligns the lsb to the 1s place.
 # Values for which this trick doesn't work are going to have outputs of 0 or Inf.
@@ -9,7 +11,7 @@ MAGIC_ROUND_CONST(::Type{Float32}) = 1.048576f7
 MAX_EXP(n::Val{2}, ::Type{Float32}) = 128.0f0
 MAX_EXP(n::Val{2}, ::Type{Float64}) = 1024.0
 MAX_EXP(n::Val{:ℯ}, ::Type{Float32}) = 88.72284f0
-MAX_EXP(n::Val{:ℯ}, ::Type{Float64}) = 709.782712893384
+MAX_EXP(n::Val{:ℯ}, ::Type{Float64}) = 709.7827128933841
 MAX_EXP(n::Val{10}, ::Type{Float32}) = 38.53184f0
 MAX_EXP(n::Val{10}, ::Type{Float64}) = 308.25471555991675
 
@@ -198,91 +200,136 @@ end
 #
 # 2. Approximate b^r by 7th-degree minimax polynomial p_b(r) on the interval [-log(b,2)/2, log(b,2)/2].
 # 3. Scale back: b^x = 2^N * p_b(r)
-
 # For both, a little extra care needs to be taken if b^r is subnormal.
 # The solution is to do the scaling back in 2 steps as just messing with the exponent wouldn't work.
-for (func, base) in (:exp2=>Val(2), :exp=>Val(:ℯ), :exp10=>Val(10))
-    @eval begin
-        function ($func)(x::T) where T<:Float64
-            N_float = muladd(x, LogBo256INV($base, T), MAGIC_ROUND_CONST(T))
-            N = reinterpret(uinttype(T), N_float) % Int32
-            N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV($base, T))
-            r = muladd(N_float, LogBo256U($base, T), x)
-            r = muladd(N_float, LogBo256L($base, T), r)
-            k = N >> 8
-            jU, jL = table_unpack(N&255 +1)
-            small_part =  muladd(jU, expm1b_kernel($base, r), jL) + jU
-
-            if !(abs(x) <= SUBNORM_EXP($base, T))
-                x >= MAX_EXP($base, T) && return Inf
-                x <= MIN_EXP($base, T) && return 0.0
-                if k <= -53
-                    # The UInt64 forces promotion. (Only matters for 32 bit systems.)
-                    twopk = (k + UInt64(53)) << 52
-                    return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
-                end
-            end
-            twopk = Int64(k) << 52
-            return reinterpret(T, twopk + reinterpret(Int64, small_part))
-        end
 
-        function ($func)(x::T) where T<:Float32
-            N_float = round(x*LogBINV($base, T))
-            N = unsafe_trunc(Int32, N_float)
-            r = muladd(N_float, LogBU($base, T), x)
-            r = muladd(N_float, LogBL($base, T), r)
-            small_part = expb_kernel($base, r)
-            if !(abs(x) <= SUBNORM_EXP($base, T))
-                x > MAX_EXP($base, T) && return Inf32
-                x < MIN_EXP($base, T) && return 0.0f0
-                if N<=Int32(-24)
-                    twopk = reinterpret(T, (N+Int32(151)) << Int32(23))
-                    return (twopk*small_part)*(2f0^(-24))
-                end
-                N == (exponent_max(T)+1) && return small_part * T(2.0) * T(2.0)^exponent_max(T)
-            end
-            twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
-            return twopk*small_part
+@inline function exp_impl(x::Float64, base)
+    T = Float64
+    N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(base, T))
+    r = muladd(N_float, LogBo256U(base, T), x)
+    r = muladd(N_float, LogBo256L(base, T), r)
+    k = N >> 8
+    jU, jL = table_unpack(N&255 + 1)
+    small_part =  muladd(jU, expm1b_kernel(base, r), jL) + jU
+
+    if !(abs(x) <= SUBNORM_EXP(base, T))
+        x >= MAX_EXP(base, T) && return Inf
+        x <= MIN_EXP(base, T) && return 0.0
+        if k <= -53
+            # The UInt64 forces promotion. (Only matters for 32 bit systems.)
+            twopk = (k + UInt64(53)) << 52
+            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
         end
+        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+    end
+    twopk = Int64(k) << 52
+    return reinterpret(T, twopk + reinterpret(Int64, small_part))
+end
+@inline function exp_impl_fast(x::Float64, base)
+    T = Float64
+    N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(base, T))
+    r = muladd(N_float, LogBo256U(base, T), x)
+    r = muladd(N_float, LogBo256L(base, T), r)
+    k = N >> 8
+    jU = reinterpret(Float64, JU_CONST | (@inbounds J_TABLE[N&255 + 1] & JU_MASK))
+    small_part =  muladd(jU, expm1b_kernel(base, r), jU)
+    twopk = Int64(k) << 52
+    return reinterpret(T, twopk + reinterpret(Int64, small_part))
+end
 
-        function ($func)(a::Float16)
-            T = Float32
-            x = T(a)
-            N_float = round(x*LogBINV($base, T))
-            N = unsafe_trunc(Int32, N_float)
-            r = muladd(N_float, LogB($base, Float16), x)
-            small_part = expb_kernel($base, r)
-            if !(abs(x) <= SUBNORM_EXP($base, T))
-                x > MAX_EXP($base, T) && return Inf16
-                N<=Int32(-24) && return zero(Float16)
-            end
-            twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
-            return Float16(twopk*small_part)
+@inline function exp_impl(x::Float32, base)
+    T = Float32
+    N_float = round(x*LogBINV(base, T))
+    N = unsafe_trunc(Int32, N_float)
+    r = muladd(N_float, LogBU(base, T), x)
+    r = muladd(N_float, LogBL(base, T), r)
+    small_part = expb_kernel(base, r)
+    if !(abs(x) <= SUBNORM_EXP(base, T))
+        x > MAX_EXP(base, T) && return Inf32
+        x < MIN_EXP(base, T) && return 0.0f0
+        if N <= Int32(-24)
+            twopk = reinterpret(T, (N+Int32(151)) << Int32(23))
+            return (twopk*small_part)*(2f0^(-24))
         end
+        N == 128 && return small_part * T(2.0) * T(2.0)^127
+    end
+    twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
+    return twopk*small_part
+end
+
+@inline function exp_impl_fast(x::Float32, base)
+    T = Float32
+    N_float = round(x*LogBINV(base, T))
+    N = unsafe_trunc(Int32, N_float)
+    r = muladd(N_float, LogBU(base, T), x)
+    r = muladd(N_float, LogBL(base, T), r)
+    small_part = expb_kernel(base, r)
+    twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
+    return twopk*small_part
+end
+
+@inline function exp_impl(a::Float16, base)
+    T = Float32
+    x = T(a)
+    N_float = round(x*LogBINV(base, T))
+    N = unsafe_trunc(Int32, N_float)
+    r = muladd(N_float, LogB(base, Float16), x)
+    small_part = expb_kernel(base, r)
+    if !(abs(x) <= SUBNORM_EXP(base, T))
+        x > MAX_EXP(base, T) && return Inf16
+        N<=Int32(-24) && return zero(Float16)
+    end
+    twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
+    return Float16(twopk*small_part)
+end
+
+for (func, fast_func, base) in ((:exp2,  :exp2_fast,  Val(2)),
+                                (:exp,   :exp_fast,   Val(:ℯ)),
+                                (:exp10, :exp10_fast, Val(10)))
+    @eval begin
+        $func(x::Union{Float16,Float32,Float64}) = exp_impl(x, $base)
+        $fast_func(x::Union{Float32,Float64}) = exp_impl_fast(x, $base)
     end
 end
+
 @doc """
     exp(x)
 
-Compute the natural base exponential of `x`, in other words ``e^x``.
+Compute the natural base exponential of `x`, in other words ``ℯ^x``.
+
+See also [`exp2`](@ref), [`exp10`](@ref) and [`cis`](@ref).
 
 # Examples
 ```jldoctest
 julia> exp(1.0)
 2.718281828459045
+
+julia> exp(im * pi) == cis(pi)
+true
 ```
 """ exp(x::Real)
 
-
 """
     exp2(x)
 
 Compute the base 2 exponential of `x`, in other words ``2^x``.
 
+See also [`ldexp`](@ref), [`<<`](@ref).
+
 # Examples
 ```jldoctest
 julia> exp2(5)
 32.0
+
+julia> 2^5
+32
+
+julia> exp2(63) > typemax(Int)
+true
 ```
 """
 exp2(x)
@@ -296,6 +343,9 @@ Compute the base 10 exponential of `x`, in other words ``10^x``.
 ```jldoctest
 julia> exp10(2)
 100.0
+
+julia> 10^2
+100
 ```
 """
 exp10(x)
@@ -314,3 +364,106 @@ exp10(x)
         reinterpret(Float64, (exponent_bias(Float64) + (x % Int64)) << (significand_bits(Float64) % UInt))
     end
 end
+
+# min and max arguments for expm1 by type
+MAX_EXP(::Type{Float64}) =  709.7827128933845   # log 2^1023*(2-2^-52)
+MIN_EXP(::Type{Float64}) = -37.42994775023705   # log 2^-54
+MAX_EXP(::Type{Float32}) =  88.72284f0          # log 2^127 *(2-2^-23)
+MIN_EXP(::Type{Float32}) = -17.32868f0          # log 2^-25
+MAX_EXP(::Type{Float16}) =  Float16(11.09)      # log 2^15 *(2-2^-10)
+MIN_EXP(::Type{Float16}) = -Float16(8.32)       # log 2^-12
+
+Ln2INV(::Type{Float64}) = 1.4426950408889634
+Ln2(::Type{Float64}) = -0.6931471805599453
+Ln2INV(::Type{Float32}) = 1.442695f0
+Ln2(::Type{Float32}) = -0.6931472f0
+
+# log(.75) <= x <= log(1.25)
+@inline function expm1_small(x::Float64)
+    p = evalpoly(x, (0.16666666666666632, 0.04166666666666556, 0.008333333333401227,
+                     0.001388888889068783, 0.00019841269447671544, 2.480157691845342e-5,
+                     2.7558212415361945e-6, 2.758218402815439e-7, 2.4360682937111612e-8))
+    p2 = exthorner(x, (1.0, .5, p))
+    return fma(x, p2[1], x*p2[2])
+end
+@inline function expm1_small(x::Float32)
+    p = evalpoly(x, (0.16666666f0, 0.041666627f0, 0.008333682f0,
+                     0.0013908712f0, 0.0001933096f0))
+    p2 = exthorner(x, (1f0, .5f0, p))
+    return fma(x, p2[1], x*p2[2])
+end
+
+function expm1(x::Float64)
+    T = Float64
+    if -0.2876820724517809 <= x <= 0.22314355131420976
+        return expm1_small(x)
+    elseif !(abs(x)<=MIN_EXP(Float64))
+        isnan(x) && return x
+        x > MAX_EXP(Float64) && return Inf
+        x < MIN_EXP(Float64) && return -1.0
+    end
+
+    N_float = muladd(x, LogBo256INV(Val(:ℯ), T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(Val(:ℯ), T))
+    r = muladd(N_float, LogBo256U(Val(:ℯ), T), x)
+    r = muladd(N_float, LogBo256L(Val(:ℯ), T), r)
+    k = Int64(N >> 8)
+    jU, jL = table_unpack(N&255 +1)
+    p = expm1b_kernel(Val(:ℯ), r)
+    twopk  = reinterpret(Float64, (1023+k) << 52)
+    twopnk = reinterpret(Float64, (1023-k) << 52)
+    k>=106 && return reinterpret(Float64, (1022+k) << 52)*(jU + muladd(jU, p, jL))*2
+    k>=53 && return twopk*(jU + muladd(jU, p, (jL-twopnk)))
+    k<=-2 && return twopk*(jU + muladd(jU, p, jL))-1
+    return twopk*((jU-twopnk) + fma(jU, p, jL))
+end
+
+function expm1(x::Float32)
+    x > MAX_EXP(Float32) && return Inf32
+    x < MIN_EXP(Float32) && return -1f0
+    if -0.2876821f0 <=x <= 0.22314355f0
+        return expm1_small(x)
+    end
+    x = Float64(x)
+    N_float = round(x*Ln2INV(Float64))
+    N = unsafe_trunc(UInt64, N_float)
+    r = muladd(N_float, Ln2(Float64), x)
+    hi = evalpoly(r, (1.0, .5, 0.16666667546642386, 0.041666183019487026,
+                      0.008332997481506921, 0.0013966479175977883, 0.0002004037059220124))
+    small_part = r*hi
+    twopk = reinterpret(Float64, (N+1023) << 52)
+    return Float32(muladd(twopk, small_part, twopk-1.0))
+end
+
+function expm1(x::Float16)
+    x > MAX_EXP(Float16) && return Inf16
+    x < MIN_EXP(Float16) && return Float16(-1.0)
+    x = Float32(x)
+    if -0.2876821f0 <=x <= 0.22314355f0
+        return Float16(x*evalpoly(x, (1f0, .5f0, 0.16666628f0, 0.04166785f0, 0.008351848f0, 0.0013675707f0)))
+    end
+    N_float = round(x*Ln2INV(Float32))
+    N = unsafe_trunc(UInt32, N_float)
+    r = muladd(N_float, Ln2(Float32), x)
+    hi = evalpoly(r, (1f0, .5f0, 0.16666667f0, 0.041665863f0, 0.008333111f0, 0.0013981499f0, 0.00019983904f0))
+    small_part = r*hi
+    twopk = reinterpret(Float32, (N+Int32(127)) << Int32(23))
+    return Float16(muladd(twopk, small_part, twopk-1f0))
+end
+
+"""
+    expm1(x)
+
+Accurately compute ``e^x-1``. It avoids the loss of precision involved in the direct
+evaluation of exp(x)-1 for small values of x.
+# Examples
+```jldoctest
+julia> expm1(1e-16)
+1.0e-16
+
+julia> exp(1e-16) - 1
+0.0
+```
+"""
+expm1(x)
diff --git a/base/special/hyperbolic.jl b/base/special/hyperbolic.jl
index 1fee6e2879220e..d84cadcb2b6f21 100644
--- a/base/special/hyperbolic.jl
+++ b/base/special/hyperbolic.jl
@@ -14,19 +14,6 @@
 # is preserved.
 # ====================================================
 
-@inline function exthorner(x, p::Tuple)
-    # polynomial evaluation using compensated summation.
-    # much more accurate, especially when lo can be combined with other rounding errors
-    hi, lo = p[end], zero(x)
-    for i in length(p)-1:-1:1
-        pi = p[i]
-        prod = hi*x
-        err = fma(hi, x, -prod)
-        hi = pi+prod
-        lo = fma(lo, x, prod - (hi - pi) + err)
-    end
-    return hi, lo
-end
 
 # Hyperbolic functions
 # sinh methods
@@ -63,6 +50,11 @@ function sinh_kernel(x::Float32)
     return Float32(res*x)
 end
 
+@inline function sinh16_kernel(x::Float32)
+    res = evalpoly(x*x, (1.0f0, 0.16666667f0, 0.008333337f0, 0.00019841001f0,
+                         2.7555539f-6, 2.514339f-8, 1.6260095f-10))
+    return Float16(res*x)
+end
 
 function sinh(x::T) where T<:Union{Float32,Float64}
     # Method
@@ -88,6 +80,14 @@ function sinh(x::T) where T<:Union{Float32,Float64}
     return copysign(T(.5)*(E - 1/E),x)
 end
 
+function Base.sinh(a::Float16)
+    x = Float32(a)
+    absx = abs(x)
+    absx <= SINH_SMALL_X(Float32) && return sinh16_kernel(x)
+    E = exp(absx)
+    return Float16(copysign(.5f0*(E - 1/E),x))
+end
+
 COSH_SMALL_X(::Type{T}) where T= one(T)
 
 function cosh_kernel(x2::Float32)
@@ -112,7 +112,7 @@ function cosh(x::T) where T<:Union{Float32,Float64}
     #               return cosh(x) = = (exp(x) + exp(-x))/2
     #      e)   H_LARGE_X  <= x
     #               return cosh(x) = exp(x/2)/2 * exp(x/2)
-    #                  Note that this branch automatically deals with Infs and NaNs
+    #               Note that this branch automatically deals with Infs and NaNs
 
     absx = abs(x)
     if absx <= COSH_SMALL_X(T)
diff --git a/base/special/trig.jl b/base/special/trig.jl
index 6ebfd2f2932aa3..f735ea43f24256 100644
--- a/base/special/trig.jl
+++ b/base/special/trig.jl
@@ -168,6 +168,8 @@ end
 
 Simultaneously compute the sine and cosine of `x`, where `x` is in radians, returning
 a tuple `(sine, cosine)`.
+
+See also [`cis`](@ref), [`sincospi`](@ref), [`sincosd`](@ref).
 """
 function sincos(x::T) where T<:Union{Float32, Float64}
     if abs(x) < T(pi)/4
@@ -742,6 +744,8 @@ mulpi_ext(x::Real) = pi*x # Fallback
     sinpi(x)
 
 Compute ``\\sin(\\pi x)`` more accurately than `sin(pi*x)`, especially for large `x`.
+
+See also [`sind`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
 """
 function sinpi(x::T) where T<:AbstractFloat
     if !isfinite(x)
@@ -863,6 +867,8 @@ where `x` is in radians), returning a tuple `(sine, cosine)`.
 
 !!! compat "Julia 1.6"
     This function requires Julia 1.6 or later.
+
+See also: [`cispi`](@ref), [`sincosd`](@ref), [`sinpi`](@ref).
 """
 function sincospi(x::T) where T<:AbstractFloat
     if !isfinite(x)
@@ -1069,6 +1075,8 @@ isinf_real(x::Number) = false
     sinc(x)
 
 Compute ``\\sin(\\pi x) / (\\pi x)`` if ``x \\neq 0``, and ``1`` if ``x = 0``.
+
+See also [`cosc`](@ref), its derivative.
 """
 sinc(x::Number) = _sinc(float(x))
 sinc(x::Integer) = iszero(x) ? one(x) : zero(x)
@@ -1246,37 +1254,37 @@ Simultaneously compute the sine and cosine of `x`, where `x` is in degrees.
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
 """
-function sincosd(x::Real)
-    if isinf(x)
-        return throw(DomainError(x, "sincosd(x) is only defined for finite `x`."))
-    elseif isnan(x)
-        return (oftype(x,NaN), oftype(x,NaN))
-    end
-
-    # It turns out that calling those functions separately yielded better
-    # performance than considering each case and calling `sincos_kernel`.
-    return (sind(x), cosd(x))
-end
+sincosd(x) = (sind(x), cosd(x))
+# It turns out that calling these functions separately yields better
+# performance than considering each case and calling `sincos_kernel`.
 
 sincosd(::Missing) = (missing, missing)
 
 for (fd, f, fn) in ((:sind, :sin, "sine"), (:cosd, :cos, "cosine"), (:tand, :tan, "tangent"))
-    name = string(fd)
-    @eval begin
-        @doc """
-            $($name)(x)
-        Compute $($fn) of `x`, where `x` is in degrees. """ ($fd)(z) = ($f)(deg2rad(z))
+    for (fu, un) in ((:deg2rad, "degrees"),)
+        name = string(fd)
+        @eval begin
+            @doc """
+                $($name)(x)
+
+            Compute $($fn) of `x`, where `x` is in $($un).
+            If `x` is a matrix, `x` needs to be a square matrix. """ ($fd)(x) = ($f)(($fu).(x))
+        end
     end
 end
 
 for (fd, f, fn) in ((:asind, :asin, "sine"), (:acosd, :acos, "cosine"),
                     (:asecd, :asec, "secant"), (:acscd, :acsc, "cosecant"), (:acotd, :acot, "cotangent"))
-    name = string(fd)
-    @eval begin
-        @doc """
-            $($name)(x)
 
-        Compute the inverse $($fn) of `x`, where the output is in degrees. """ ($fd)(y) = rad2deg(($f)(y))
+    for (fu, un) in ((:rad2deg, "degrees"),)
+        name = string(fd)
+        @eval begin
+            @doc """
+                $($name)(x)
+
+            Compute the inverse $($fn) of `x`, where the output is in $($un).
+            If `x` is a matrix, `x` needs to be a square matrix. """ ($fd)(x) = ($fu).(($f)(x))
+        end
     end
 end
 
@@ -1286,5 +1294,5 @@ end
 
 Compute the inverse tangent of `y` or `y/x`, respectively, where the output is in degrees.
 """
-atand(y)    = rad2deg(atan(y))
-atand(y, x) = rad2deg(atan(y,x))
+atand(y)    = rad2deg.(atan(y))
+atand(y, x) = rad2deg.(atan(y,x))
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index 99ee5c57db89f8..8483aec55cbffc 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -133,7 +133,7 @@ function lookup(ip::Union{Base.InterpreterIP,Core.Compiler.InterpreterIP})
     else
         func = top_level_scope_sym
         file = empty_sym
-        line = 0
+        line = Int32(0)
     end
     i = max(ip.stmt+1, 1)  # ip.stmt is 0-indexed
     if i > length(codeinfo.codelocs) || codeinfo.codelocs[i] == 0
diff --git a/base/stat.jl b/base/stat.jl
index d3612b23a0fd10..f8d28cadf0c723 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -26,6 +26,7 @@ export
     uperm
 
 struct StatStruct
+    desc    :: Union{String, OS_HANDLE} # for show method, not included in equality or hash
     device  :: UInt
     inode   :: UInt
     mode    :: UInt
@@ -40,9 +41,25 @@ struct StatStruct
     ctime   :: Float64
 end
 
-StatStruct() = StatStruct(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+@eval function Base.:(==)(x::StatStruct, y::StatStruct) # do not include `desc` in equality or hash
+  $(let ex = true
+        for fld in fieldnames(StatStruct)[2:end]
+            ex = :(getfield(x, $(QuoteNode(fld))) === getfield(y, $(QuoteNode(fld))) && $ex)
+        end
+        Expr(:return, ex)
+    end)
+end
+@eval function Base.hash(obj::StatStruct, h::UInt)
+  $(quote
+        $(Any[:(h = hash(getfield(obj, $(QuoteNode(fld))), h)) for fld in fieldnames(StatStruct)[2:end]]...)
+        return h
+    end)
+end
 
-StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
+StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct("", buf)
+StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
+    desc isa OS_HANDLE ? desc : String(desc),
     ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
     ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
     ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
@@ -57,7 +74,73 @@ StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
     ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
 )
 
-show(io::IO, st::StatStruct) = print(io, "StatStruct(mode=0o$(string(filemode(st), base = 8, pad = 6)), size=$(filesize(st)))")
+function iso_datetime_with_relative(t, tnow)
+    str = Libc.strftime("%FT%T%z", t)
+    secdiff = t - tnow
+    for (d, name) in ((24*60*60, "day"), (60*60, "hour"), (60, "minute"), (1, "second"))
+        tdiff = round(Int, div(abs(secdiff), d))
+        if tdiff != 0 # find first unit difference
+            plural = tdiff == 1 ? "" : "s"
+            when = secdiff < 0 ? "ago" : "in the future"
+            return "$str ($tdiff $name$plural $when)"
+        end
+    end
+    return "$str (just now)"
+end
+
+
+function getusername(uid::Unsigned)
+    pwd = Libc.getpwuid(uid, false)
+    pwd === nothing && return
+    isempty(pwd.username) && return
+    return pwd.username
+end
+
+function getgroupname(gid::Unsigned)
+    gp = Libc.getgrgid(gid, false)
+    gp === nothing && return
+    isempty(gp.groupname) && return
+    return gp.groupname
+end
+
+function show_statstruct(io::IO, st::StatStruct, oneline::Bool)
+    print(io, oneline ? "StatStruct(" : "StatStruct for ")
+    show(io, st.desc)
+    oneline || print(io, "\n  ")
+    print(io, " size: ", st.size, " bytes")
+    oneline || print(io, "\n")
+    print(io, " device: ", st.device)
+    oneline || print(io, "\n ")
+    print(io, " inode: ", st.inode)
+    oneline || print(io, "\n  ")
+    print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
+    oneline || print(io, "\n ")
+    print(io, " nlink: ", st.nlink)
+    oneline || print(io, "\n   ")
+    print(io, " uid: $(st.uid)")
+    username = getusername(st.uid)
+    username === nothing || print(io, " (", username, ")")
+    oneline || print(io, "\n   ")
+    print(io, " gid: ", st.gid)
+    groupname = getgroupname(st.gid)
+    groupname === nothing || print(io, " (", groupname, ")")
+    oneline || print(io, "\n  ")
+    print(io, " rdev: ", st.rdev)
+    oneline || print(io, "\n ")
+    print(io, " blksz: ", st.blksize)
+    oneline || print(io, "\n")
+    print(io, " blocks: ", st.blocks)
+    tnow = round(UInt, time())
+    oneline || print(io, "\n ")
+    print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
+    oneline || print(io, "\n ")
+    print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    oneline && print(io, ")")
+    return nothing
+end
+
+show(io::IO, st::StatStruct) = show_statstruct(io, st, true)
+show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false)
 
 # stat & lstat functions
 
@@ -66,9 +149,9 @@ macro stat_call(sym, arg1type, arg)
         stat_buf = zeros(UInt8, ccall(:jl_sizeof_stat, Int32, ()))
         r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf)
         if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL))
-            uv_error(string("stat(",repr($(esc(arg))),")"), r)
+            uv_error(string("stat(", repr($(esc(arg))), ")"), r)
         end
-        st = StatStruct(stat_buf)
+        st = StatStruct($(esc(arg)), stat_buf)
         if ispath(st) != (r == 0)
             error("stat returned zero type for a valid path")
         end
@@ -92,6 +175,7 @@ The fields of the structure are:
 
 | Name    | Description                                                        |
 |:--------|:-------------------------------------------------------------------|
+| desc    | The path or OS file descriptor                                     |
 | size    | The size (in bytes) of the file                                    |
 | device  | ID of the device that contains the file                            |
 | inode   | The inode number of the file                                       |
@@ -120,12 +204,73 @@ lstat(path...) = lstat(joinpath(path...))
 
 # some convenience functions
 
+const filemode_table = (
+    [
+        (S_IFLNK, "l"),
+        (S_IFSOCK, "s"),  # Must appear before IFREG and IFDIR as IFSOCK == IFREG | IFDIR
+        (S_IFREG, "-"),
+        (S_IFBLK, "b"),
+        (S_IFDIR, "d"),
+        (S_IFCHR, "c"),
+        (S_IFIFO, "p")
+    ],
+    [
+        (S_IRUSR, "r"),
+    ],
+    [
+        (S_IWUSR, "w"),
+    ],
+    [
+        (S_IXUSR|S_ISUID, "s"),
+        (S_ISUID, "S"),
+        (S_IXUSR, "x")
+    ],
+    [
+        (S_IRGRP, "r"),
+    ],
+    [
+        (S_IWGRP, "w"),
+    ],
+    [
+        (S_IXGRP|S_ISGID, "s"),
+        (S_ISGID, "S"),
+        (S_IXGRP, "x")
+    ],
+    [
+        (S_IROTH, "r"),
+    ],
+    [
+        (S_IWOTH, "w"),
+    ],
+    [
+        (S_IXOTH|S_ISVTX, "t"),
+        (S_ISVTX, "T"),
+        (S_IXOTH, "x")
+    ]
+)
+
 """
     filemode(file)
 
 Equivalent to `stat(file).mode`.
 """
 filemode(st::StatStruct) = st.mode
+filemode_string(st::StatStruct) = filemode_string(st.mode)
+function filemode_string(mode)
+    str = IOBuffer()
+    for table in filemode_table
+        complete = true
+        for (bit, char) in table
+            if mode & bit == bit
+                write(str, char)
+                complete = false
+                break
+            end
+        end
+        complete && write(str, "-")
+    end
+    return String(take!(str))
+end
 
 """
     filesize(path...)
@@ -187,7 +332,7 @@ julia> isdir("not/a/directory")
 false
 ```
 
-See also: [`isfile`](@ref) and [`ispath`](@ref).
+See also [`isfile`](@ref) and [`ispath`](@ref).
 """
 isdir(st::StatStruct) = filemode(st) & 0xf000 == 0x4000
 
@@ -216,7 +361,7 @@ true
 julia> close(f); rm("test_file.txt")
 ```
 
-See also: [`isdir`](@ref) and [`ispath`](@ref).
+See also [`isdir`](@ref) and [`ispath`](@ref).
 """
 isfile(st::StatStruct) = filemode(st) & 0xf000 == 0x8000
 
diff --git a/base/stream.jl b/base/stream.jl
index 509bc9953add60..05178a7f9ebcb3 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -109,7 +109,7 @@ function eof(s::LibuvStream)
     # and that we won't return true if there's a readerror pending (it'll instead get thrown).
     # This requires some careful ordering here (TODO: atomic loads)
     bytesavailable(s) > 0 && return false
-    open = isopen(s) # must precede readerror check
+    open = isreadable(s) # must precede readerror check
     s.readerror === nothing || throw(s.readerror)
     return !open
 end
@@ -270,6 +270,7 @@ show(io::IO, stream::LibuvStream) = print(io, typeof(stream), "(",
 function isreadable(io::LibuvStream)
     bytesavailable(io) > 0 && return true
     isopen(io) || return false
+    io.status == StatusEOF && return false
     return ccall(:uv_is_readable, Cint, (Ptr{Cvoid},), io.handle) != 0
 end
 
@@ -282,6 +283,7 @@ end
 lock(s::LibuvStream) = lock(s.lock)
 unlock(s::LibuvStream) = unlock(s.lock)
 
+setup_stdio(stream::LibuvStream, ::Bool) = (stream, false)
 rawhandle(stream::LibuvStream) = stream.handle
 unsafe_convert(::Type{Ptr{Cvoid}}, s::Union{LibuvStream, LibuvServer}) = s.handle
 
@@ -378,7 +380,7 @@ function isopen(x::Union{LibuvStream, LibuvServer})
     if x.status == StatusUninit || x.status == StatusInit
         throw(ArgumentError("$x is not initialized"))
     end
-    return x.status != StatusClosed && x.status != StatusEOF
+    return x.status != StatusClosed
 end
 
 function check_open(x::Union{LibuvStream, LibuvServer})
@@ -390,13 +392,13 @@ end
 function wait_readnb(x::LibuvStream, nb::Int)
     # fast path before iolock acquire
     bytesavailable(x.buffer) >= nb && return
-    open = isopen(x) # must precede readerror check
+    open = isopen(x) && x.status != StatusEOF # must precede readerror check
     x.readerror === nothing || throw(x.readerror)
     open || return
     iolock_begin()
     # repeat fast path after iolock acquire, before other expensive work
     bytesavailable(x.buffer) >= nb && (iolock_end(); return)
-    open = isopen(x)
+    open = isopen(x) && x.status != StatusEOF
     x.readerror === nothing || throw(x.readerror)
     open || (iolock_end(); return)
     # now do the "real" work
@@ -407,6 +409,7 @@ function wait_readnb(x::LibuvStream, nb::Int)
         while bytesavailable(x.buffer) < nb
             x.readerror === nothing || throw(x.readerror)
             isopen(x) || break
+            x.status != StatusEOF || break
             x.throttle = max(nb, x.throttle)
             start_reading(x) # ensure we are reading
             iolock_end()
@@ -431,6 +434,52 @@ function wait_readnb(x::LibuvStream, nb::Int)
     nothing
 end
 
+function closewrite(s::LibuvStream)
+    iolock_begin()
+    check_open(s)
+    req = Libc.malloc(_sizeof_uv_shutdown)
+    uv_req_set_data(req, C_NULL) # in case we get interrupted before arriving at the wait call
+    err = ccall(:uv_shutdown, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}),
+                req, s, @cfunction(uv_shutdowncb_task, Cvoid, (Ptr{Cvoid}, Cint)))
+    if err < 0
+        Libc.free(req)
+        uv_error("shutdown", err)
+    end
+    ct = current_task()
+    preserve_handle(ct)
+    sigatomic_begin()
+    uv_req_set_data(req, ct)
+    iolock_end()
+    status = try
+        sigatomic_end()
+        wait()::Cint
+    finally
+        # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
+        sigatomic_end()
+        iolock_begin()
+        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        if uv_req_data(req) != C_NULL
+            # req is still alive,
+            # so make sure we won't get spurious notifications later
+            uv_req_set_data(req, C_NULL)
+        else
+            # done with req
+            Libc.free(req)
+        end
+        iolock_end()
+        unpreserve_handle(ct)
+    end
+    if isopen(s)
+        if status < 0 || ccall(:uv_is_readable, Cint, (Ptr{Cvoid},), s.handle) == 0
+            close(s)
+        end
+    end
+    if status < 0
+        throw(_UVError("shutdown", status))
+    end
+    nothing
+end
+
 function wait_close(x::Union{LibuvStream, LibuvServer})
     preserve_handle(x)
     lock(x.cond)
@@ -451,7 +500,7 @@ function close(stream::Union{LibuvStream, LibuvServer})
     if stream.status == StatusInit
         ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
         stream.status = StatusClosing
-    elseif isopen(stream) || stream.status == StatusEOF
+    elseif isopen(stream)
         should_wait = uv_handle_data(stream) != C_NULL
         if stream.status != StatusClosing
             ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
@@ -606,35 +655,33 @@ function uv_readcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid})
     nrequested = ccall(:jl_uv_buf_len, Csize_t, (Ptr{Cvoid},), buf)
     function readcb_specialized(stream::LibuvStream, nread::Int, nrequested::UInt)
         lock(stream.cond)
-        try
-            if nread < 0
-                if nread == UV_ENOBUFS && nrequested == 0
-                    # remind the client that stream.buffer is full
-                    notify(stream.cond)
-                elseif nread == UV_EOF
-                    if isa(stream, TTY)
-                        stream.status = StatusEOF # libuv called uv_stop_reading already
+        if nread < 0
+            if nread == UV_ENOBUFS && nrequested == 0
+                # remind the client that stream.buffer is full
+                notify(stream.cond)
+            elseif nread == UV_EOF # libuv called uv_stop_reading already
+                if stream.status != StatusClosing
+                    stream.status = StatusEOF
+                    if stream isa TTY # TODO: || ccall(:uv_is_writable, Cint, (Ptr{Cvoid},), stream.handle) != 0
+                        # stream can still be used either by reseteof # TODO: or write
                         notify(stream.cond)
-                    elseif stream.status != StatusClosing
-                        # begin shutdown of the stream
+                    else
+                        # underlying stream is no longer useful: begin finalization
                         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
                         stream.status = StatusClosing
                     end
-                else
-                    stream.readerror = _UVError("read", nread)
-                    # This is a fatal connection error. Shutdown requests as per the usual
-                    # close function won't work and libuv will fail with an assertion failure
-                    ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), stream)
-                    stream.status = StatusClosing
-                    notify(stream.cond)
                 end
             else
-                notify_filled(stream.buffer, nread)
-                notify(stream.cond)
+                stream.readerror = _UVError("read", nread)
+                # This is a fatal connection error
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
+                stream.status = StatusClosing
             end
-        finally
-            unlock(stream.cond)
+        else
+            notify_filled(stream.buffer, nread)
+            notify(stream.cond)
         end
+        unlock(stream.cond)
 
         # Stop background reading when
         # 1) there's nobody paying attention to the data we are reading
@@ -651,6 +698,7 @@ function uv_readcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid})
         nothing
     end
     readcb_specialized(stream_unknown_type, Int(nread), UInt(nrequested))
+    nothing
 end
 
 function reseteof(x::TTY)
@@ -844,6 +892,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
         while bytesavailable(buf) < nb
             s.readerror === nothing || throw(s.readerror)
             isopen(s) || break
+            s.status != StatusEOF || break
             iolock_end()
             wait_readnb(s, nb)
             iolock_begin()
@@ -890,6 +939,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt)
         while bytesavailable(buf) < nb
             s.readerror === nothing || throw(s.readerror)
             isopen(s) || throw(EOFError())
+            s.status != StatusEOF || throw(EOFError())
             iolock_end()
             wait_readnb(s, nb)
             iolock_begin()
@@ -946,13 +996,14 @@ function readuntil(x::LibuvStream, c::UInt8; keep::Bool=false)
     @assert buf.seekable == false
     if !occursin(c, buf) # fast path checks first
         x.readerror === nothing || throw(x.readerror)
-        if isopen(x)
+        if isopen(x) && x.status != StatusEOF
             preserve_handle(x)
             lock(x.cond)
             try
                 while !occursin(c, x.buffer)
                     x.readerror === nothing || throw(x.readerror)
                     isopen(x) || break
+                    x.status != StatusEOF || break
                     start_reading(x) # ensure we are reading
                     iolock_end()
                     wait(x.cond)
@@ -1115,6 +1166,20 @@ function uv_writecb_task(req::Ptr{Cvoid}, status::Cint)
     nothing
 end
 
+function uv_shutdowncb_task(req::Ptr{Cvoid}, status::Cint)
+    d = uv_req_data(req)
+    if d != C_NULL
+        uv_req_set_data(req, C_NULL) # let the Task know we got the shutdowncb
+        t = unsafe_pointer_to_objref(d)::Task
+        schedule(t, status)
+    else
+        # no owner for this req, safe to just free it
+        Libc.free(req)
+    end
+    nothing
+end
+
+
 _fd(x::IOStream) = RawFD(fd(x))
 _fd(x::Union{OS_HANDLE, RawFD}) = x
 
@@ -1127,7 +1192,7 @@ function _fd(x::Union{LibuvStream, LibuvServer})
     return fd[]
 end
 
-struct redirect_stdio <: Function
+struct RedirectStdStream <: Function
     unix_fd::Int
     writable::Bool
 end
@@ -1135,7 +1200,7 @@ for (f, writable, unix_fd) in
         ((:redirect_stdin, false, 0),
          (:redirect_stdout, true, 1),
          (:redirect_stderr, true, 2))
-    @eval const ($f) = redirect_stdio($unix_fd, $writable)
+    @eval const ($f) = RedirectStdStream($unix_fd, $writable)
 end
 function _redirect_io_libc(stream, unix_fd::Int)
     posix_fd = _fd(stream)
@@ -1154,7 +1219,7 @@ function _redirect_io_global(io, unix_fd::Int)
     unix_fd == 2 && (global stderr = io)
     nothing
 end
-function (f::redirect_stdio)(handle::Union{LibuvStream, IOStream})
+function (f::RedirectStdStream)(handle::Union{LibuvStream, IOStream})
     _redirect_io_libc(handle, f.unix_fd)
     c_sym = f.unix_fd == 0 ? cglobal(:jl_uv_stdin, Ptr{Cvoid}) :
             f.unix_fd == 1 ? cglobal(:jl_uv_stdout, Ptr{Cvoid}) :
@@ -1164,7 +1229,7 @@ function (f::redirect_stdio)(handle::Union{LibuvStream, IOStream})
     _redirect_io_global(handle, f.unix_fd)
     return handle
 end
-function (f::redirect_stdio)(::DevNull)
+function (f::RedirectStdStream)(::DevNull)
     nulldev = @static Sys.iswindows() ? "NUL" : "/dev/null"
     handle = open(nulldev, write=f.writable)
     _redirect_io_libc(handle, f.unix_fd)
@@ -1172,13 +1237,13 @@ function (f::redirect_stdio)(::DevNull)
     _redirect_io_global(devnull, f.unix_fd)
     return devnull
 end
-function (f::redirect_stdio)(io::AbstractPipe)
+function (f::RedirectStdStream)(io::AbstractPipe)
     io2 = (f.writable ? pipe_writer : pipe_reader)(io)
     f(io2)
     _redirect_io_global(io, f.unix_fd)
     return io
 end
-function (f::redirect_stdio)(p::Pipe)
+function (f::RedirectStdStream)(p::Pipe)
     if p.in.status == StatusInit && p.out.status == StatusInit
         link_pipe!(p)
     end
@@ -1186,9 +1251,9 @@ function (f::redirect_stdio)(p::Pipe)
     f(io2)
     return p
 end
-(f::redirect_stdio)() = f(Pipe())
+(f::RedirectStdStream)() = f(Pipe())
 
-# Deprecate these in v2 (redirect_stdio support)
+# Deprecate these in v2 (RedirectStdStream support)
 iterate(p::Pipe) = (p.out, 1)
 iterate(p::Pipe, i::Int) = i == 1 ? (p.in, 2) : nothing
 getindex(p::Pipe, key::Int) = key == 1 ? p.out : key == 2 ? p.in : throw(KeyError(key))
@@ -1204,6 +1269,8 @@ the pipe.
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
     `Pipe`, socket, or `devnull`.
+
+See also [`redirect_stdio`](@ref).
 """
 redirect_stdout
 
@@ -1215,6 +1282,8 @@ Like [`redirect_stdout`](@ref), but for [`stderr`](@ref).
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
     `Pipe`, socket, or `devnull`.
+
+See also [`redirect_stdio`](@ref).
 """
 redirect_stderr
 
@@ -1227,10 +1296,125 @@ Note that the direction of the stream is reversed.
 !!! note
     `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
     `Pipe`, socket, or `devnull`.
+
+See also [`redirect_stdio`](@ref).
 """
 redirect_stdin
 
-function (f::redirect_stdio)(thunk::Function, stream)
+"""
+    redirect_stdio(;stdin=stdin, stderr=stderr, stdout=stdout)
+
+Redirect a subset of the streams `stdin`, `stderr`, `stdout`.
+Each argument must be an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
+
+!!! compat "Julia 1.7"
+    `redirect_stdio` requires Julia 1.7 or later.
+"""
+function redirect_stdio(;stdin=nothing, stderr=nothing, stdout=nothing)
+    stdin  === nothing || redirect_stdin(stdin)
+    stderr === nothing || redirect_stderr(stderr)
+    stdout === nothing || redirect_stdout(stdout)
+end
+
+"""
+    redirect_stdio(f; stdin=nothing, stderr=nothing, stdout=nothing)
+
+Redirect a subset of the streams `stdin`, `stderr`, `stdout`,
+call `f()` and restore each stream.
+
+Possible values for each stream are:
+* `nothing` indicating the stream should not be redirected.
+* `path::AbstractString` redirecting the stream to the file at `path`.
+* `io` an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
+
+# Examples
+```julia
+julia> redirect_stdio(stdout="stdout.txt", stderr="stderr.txt") do
+           print("hello stdout")
+           print(stderr, "hello stderr")
+       end
+
+julia> read("stdout.txt", String)
+"hello stdout"
+
+julia> read("stderr.txt", String)
+"hello stderr"
+```
+
+# Edge cases
+
+It is possible to pass the same argument to `stdout` and `stderr`:
+```julia
+julia> redirect_stdio(stdout="log.txt", stderr="log.txt", stdin=devnull) do
+    ...
+end
+```
+
+However it is not supported to pass two distinct descriptors of the same file.
+```julia
+julia> io1 = open("same/path", "w")
+
+julia> io2 = open("same/path", "w")
+
+julia> redirect_stdio(f, stdout=io1, stderr=io2) # not suppored
+```
+Also the `stdin` argument may not be the same descriptor as `stdout` or `stderr`.
+```julia
+julia> io = open(...)
+
+julia> redirect_stdio(f, stdout=io, stdin=io) # not supported
+```
+
+!!! compat "Julia 1.7"
+    `redirect_stdio` requires Julia 1.7 or later.
+"""
+function redirect_stdio(f; stdin=nothing, stderr=nothing, stdout=nothing)
+
+    function resolve(new::Nothing, oldstream, mode)
+        (new=nothing, close=false, old=nothing)
+    end
+    function resolve(path::AbstractString, oldstream,mode)
+        (new=open(path, mode), close=true, old=oldstream)
+    end
+    function resolve(new, oldstream, mode)
+        (new=new, close=false, old=oldstream)
+    end
+
+    same_path(x, y) = false
+    function same_path(x::AbstractString, y::AbstractString)
+        # if x = y = "does_not_yet_exist.txt" then samefile will return false
+        (abspath(x) == abspath(y)) || samefile(x,y)
+    end
+    if same_path(stderr, stdin)
+        throw(ArgumentError("stdin and stderr cannot be the same path"))
+    end
+    if same_path(stdout, stdin)
+        throw(ArgumentError("stdin and stdout cannot be the same path"))
+    end
+
+    new_in , close_in , old_in  = resolve(stdin , Base.stdin , "r")
+    new_out, close_out, old_out = resolve(stdout, Base.stdout, "w")
+    if same_path(stderr, stdout)
+        # make sure that in case stderr = stdout = "same/path"
+        # only a single io is used instead of opening the same file twice
+        new_err, close_err, old_err = new_out, false, Base.stderr
+    else
+        new_err, close_err, old_err = resolve(stderr, Base.stderr, "w")
+    end
+
+    redirect_stdio(; stderr=new_err, stdin=new_in, stdout=new_out)
+
+    try
+        return f()
+    finally
+        redirect_stdio(;stderr=old_err, stdin=old_in, stdout=old_out)
+        close_err && close(new_err)
+        close_in  && close(new_in )
+        close_out && close(new_out)
+    end
+end
+
+function (f::RedirectStdStream)(thunk::Function, stream)
     stdold = f.unix_fd == 0 ? stdin :
              f.unix_fd == 1 ? stdout :
              f.unix_fd == 2 ? stderr :
@@ -1243,6 +1427,7 @@ function (f::redirect_stdio)(thunk::Function, stream)
     end
 end
 
+
 """
     redirect_stdout(f::Function, stream)
 
@@ -1285,23 +1470,26 @@ mutable struct BufferStream <: LibuvStream
     buffer::IOBuffer
     cond::Threads.Condition
     readerror::Any
-    is_open::Bool
     buffer_writes::Bool
     lock::ReentrantLock # advisory lock
+    status::Int
 
-    BufferStream() = new(PipeBuffer(), Threads.Condition(), nothing, true, false, ReentrantLock())
+    BufferStream() = new(PipeBuffer(), Threads.Condition(), nothing, false, ReentrantLock(), StatusActive)
 end
 
-isopen(s::BufferStream) = s.is_open
+isopen(s::BufferStream) = s.status != StatusClosed
+
+closewrite(s::BufferStream) = close(s)
 
 function close(s::BufferStream)
     lock(s.cond) do
-        s.is_open = false
+        s.status = StatusClosed
         notify(s.cond)
         nothing
     end
 end
 uvfinalize(s::BufferStream) = nothing
+setup_stdio(stream::BufferStream, child_readable::Bool) = invoke(setup_stdio, Tuple{IO, Bool}, stream, child_readable)
 
 function read(s::BufferStream, ::Type{UInt8})
     nread = lock(s.cond) do
@@ -1319,8 +1507,8 @@ function unsafe_read(s::BufferStream, a::Ptr{UInt8}, nb::UInt)
 end
 bytesavailable(s::BufferStream) = bytesavailable(s.buffer)
 
-isreadable(s::BufferStream) = s.buffer.readable
-iswritable(s::BufferStream) = s.buffer.writable
+isreadable(s::BufferStream) = (isopen(s) || bytesavailable(s) > 0) && s.buffer.readable
+iswritable(s::BufferStream) = isopen(s) && s.buffer.writable
 
 function wait_readnb(s::BufferStream, nb::Int)
     lock(s.cond) do
@@ -1330,7 +1518,7 @@ function wait_readnb(s::BufferStream, nb::Int)
     end
 end
 
-show(io::IO, s::BufferStream) = print(io, "BufferStream() bytes waiting:", bytesavailable(s.buffer), ", isopen:", s.is_open)
+show(io::IO, s::BufferStream) = print(io, "BufferStream(bytes waiting=", bytesavailable(s.buffer), ", isopen=", isopen(s), ")")
 
 function readuntil(s::BufferStream, c::UInt8; keep::Bool=false)
     bytes = lock(s.cond) do
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index c9108966325f82..515b8363116985 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -35,8 +35,8 @@ model allows index arithmetic to work with out-of- bounds indices as
 intermediate values so long as one never uses them to retrieve a character,
 which often helps avoid needing to code around edge cases.
 
-See also: [`codeunit`](@ref), [`ncodeunits`](@ref), [`thisind`](@ref),
-[`nextind`](@ref), [`prevind`](@ref)
+See also [`codeunit`](@ref), [`ncodeunits`](@ref), [`thisind`](@ref),
+[`nextind`](@ref), [`prevind`](@ref).
 """
 AbstractString
 
@@ -62,8 +62,8 @@ julia> ncodeunits('∫'), ncodeunits('e'), ncodeunits('ˣ')
 (3, 1, 2)
 ```
 
-See also: [`codeunit`](@ref), [`checkbounds`](@ref), [`sizeof`](@ref),
-[`length`](@ref), [`lastindex`](@ref)
+See also [`codeunit`](@ref), [`checkbounds`](@ref), [`sizeof`](@ref),
+[`length`](@ref), [`lastindex`](@ref).
 """
 ncodeunits(s::AbstractString)
 
@@ -77,7 +77,7 @@ limited to these three types, but it's hard to think of widely used string
 encodings that don't use one of these units. `codeunit(s)` is the same as
 `typeof(codeunit(s,1))` when `s` is a non-empty string.
 
-See also: [`ncodeunits`](@ref)
+See also [`ncodeunits`](@ref).
 """
 codeunit(s::AbstractString)
 
@@ -102,7 +102,7 @@ julia> typeof(a)
 UInt8
 ```
 
-See also: [`ncodeunits`](@ref), [`checkbounds`](@ref)
+See also [`ncodeunits`](@ref), [`checkbounds`](@ref).
 """
 @propagate_inbounds codeunit(s::AbstractString, i::Integer) = typeof(i) === Int ?
     throw(MethodError(codeunit, (s, i))) : codeunit(s, Int(i))
@@ -118,8 +118,8 @@ In order for `isvalid(s, i)` to be an O(1) function, the encoding of `s` must be
 [self-synchronizing](https://en.wikipedia.org/wiki/Self-synchronizing_code). This
 is a basic assumption of Julia's generic string support.
 
-See also: [`getindex`](@ref), [`iterate`](@ref), [`thisind`](@ref),
-[`nextind`](@ref), [`prevind`](@ref), [`length`](@ref)
+See also [`getindex`](@ref), [`iterate`](@ref), [`thisind`](@ref),
+[`nextind`](@ref), [`prevind`](@ref), [`length`](@ref).
 
 # Examples
 ```jldoctest
@@ -152,7 +152,7 @@ be iterated, yielding a sequences of characters. If `i` is out of bounds in `s`
 then a bounds error is raised. The `iterate` function, as part of the iteration
 protocol may assume that `i` is the start of a character in `s`.
 
-See also: [`getindex`](@ref), [`checkbounds`](@ref)
+See also [`getindex`](@ref), [`checkbounds`](@ref).
 """
 @propagate_inbounds iterate(s::AbstractString, i::Integer) = typeof(i) === Int ?
     throw(MethodError(iterate, (s, i))) : iterate(s, Int(i))
@@ -183,7 +183,7 @@ isempty(s::AbstractString) = iszero(ncodeunits(s)::Int)
 
 function getindex(s::AbstractString, i::Integer)
     @boundscheck checkbounds(s, i)
-    @inbounds return isvalid(s, i) ? iterate(s, i)[1] : string_index_err(s, i)
+    @inbounds return isvalid(s, i) ? (iterate(s, i)::NTuple{2,Any})[1] : string_index_err(s, i)
 end
 
 getindex(s::AbstractString, i::Colon) = s
@@ -375,8 +375,8 @@ value `0`.
     the string because it counts the value on the fly. This is in contrast to
     the method for arrays, which is a constant-time operation.
 
-See also: [`isvalid`](@ref), [`ncodeunits`](@ref), [`lastindex`](@ref),
-[`thisind`](@ref), [`nextind`](@ref), [`prevind`](@ref)
+See also [`isvalid`](@ref), [`ncodeunits`](@ref), [`lastindex`](@ref),
+[`thisind`](@ref), [`nextind`](@ref), [`prevind`](@ref).
 
 # Examples
 ```jldoctest
@@ -596,6 +596,15 @@ true
 julia> isascii("αβγ")
 false
 ```
+For example, `isascii` can be used as a predicate function for [`filter`](@ref) or [`replace`](@ref)
+to remove or replace non-ASCII characters, respectively:
+```jldoctest
+julia> filter(isascii, "abcdeγfgh") # discard non-ASCII chars
+"abcdefgh"
+
+julia> replace("abcdeγfgh", !isascii=>' ') # replace non-ASCII chars with spaces
+"abcde fgh"
+```
 """
 isascii(c::Char) = bswap(reinterpret(UInt32, c)) < 0x80
 isascii(s::AbstractString) = all(isascii, s)
@@ -676,13 +685,16 @@ cases where `v` contains non-ASCII characters.)
 
 # Examples
 ```jldoctest
-julia> r = reverse("Julia")
-"ailuJ"
+julia> s = "Julia🚀"
+"Julia🚀"
+
+julia> r = reverse(s)
+"🚀ailuJ"
 
-julia> for i in 1:length(r)
-           print(r[reverseind("Julia", i)])
+julia> for i in eachindex(s)
+           print(r[reverseind(r, i)])
        end
-Julia
+Julia🚀
 ```
 """
 reverseind(s::AbstractString, i::Integer) = thisind(s, ncodeunits(s)-i+1)
@@ -692,7 +704,7 @@ reverseind(s::AbstractString, i::Integer) = thisind(s, ncodeunits(s)-i+1)
 
 Repeat a string `r` times. This can be written as `s^r`.
 
-See also: [`^`](@ref :^(::Union{AbstractString, AbstractChar}, ::Integer))
+See also [`^`](@ref :^(::Union{AbstractString, AbstractChar}, ::Integer)).
 
 # Examples
 ```jldoctest
@@ -707,7 +719,7 @@ repeat(s::AbstractString, r::Integer) = repeat(String(s), r)
 
 Repeat a string or character `n` times. This can also be written as `repeat(s, n)`.
 
-See also: [`repeat`](@ref)
+See also [`repeat`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/base/strings/io.jl b/base/strings/io.jl
index 6003a6d503b900..28d0c8ce41722a 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -15,7 +15,7 @@ avoid Julia-specific details.
 For example, `show` displays strings with quotes, and `print` displays strings
 without quotes.
 
-[`string`](@ref) returns the output of `print` as a string.
+See also [`println`](@ref), [`string`](@ref).
 
 # Examples
 ```jldoctest
@@ -54,8 +54,8 @@ end
 """
     println([io::IO], xs...)
 
-Print (using [`print`](@ref)) `xs` followed by a newline.
-If `io` is not supplied, prints to [`stdout`](@ref).
+Print (using [`print`](@ref)) `xs` to `io` followed by a newline.
+If `io` is not supplied, prints to the default output stream [`stdout`](@ref).
 
 # Examples
 ```jldoctest
@@ -64,10 +64,10 @@ Hello, world
 
 julia> io = IOBuffer();
 
-julia> println(io, "Hello, world")
+julia> println(io, "Hello", ',', " world.")
 
 julia> String(take!(io))
-"Hello, world\\n"
+"Hello, world.\\n"
 ```
 """
 println(io::IO, xs...) = print(io, xs..., "\n")
@@ -172,6 +172,8 @@ highly efficient, then it may make sense to add a method to `string` and
 define `print(io::IO, x::MyType) = print(io, string(x))` to ensure the
 functions are consistent.
 
+See also: [`String`](@ref), [`repr`](@ref), [`sprint`](@ref), [`show`](@ref @show).
+
 # Examples
 ```jldoctest
 julia> string("a", 1, true)
@@ -188,6 +190,54 @@ print(io::IO, s::AbstractString) = for c in s; print(io, c); end
 write(io::IO, s::AbstractString) = (len = 0; for c in s; len += Int(write(io, c))::Int; end; len)
 show(io::IO, s::AbstractString) = print_quoted(io, s)
 
+# show elided string if more than `limit` characters
+function show(
+    io    :: IO,
+    mime  :: MIME"text/plain",
+    str   :: AbstractString;
+    limit :: Union{Int, Nothing} = nothing,
+)
+    # compute limit in default case
+    if limit === nothing
+        get(io, :limit, false) || return show(io, str)
+        limit = max(20, displaysize(io)[2])
+        # one line in collection, seven otherwise
+        get(io, :typeinfo, nothing) === nothing && (limit *= 7)
+    end
+
+    # early out for short strings
+    len = ncodeunits(str)
+    len ≤ limit - 2 && # quote chars
+        return show(io, str)
+
+    # these don't depend on string data
+    units = codeunit(str) == UInt8 ? "bytes" : "code units"
+    skip_text(skip) = " ⋯ $skip $units ⋯ "
+    short = length(skip_text("")) + 4 # quote chars
+    chars = max(limit, short + 1) - short # at least 1 digit
+
+    # figure out how many characters to print in elided case
+    chars -= d = ndigits(len - chars) # first adjustment
+    chars += d - ndigits(len - chars) # second if needed
+    chars = max(0, chars)
+
+    # find head & tail, avoiding O(length(str)) computation
+    head = nextind(str, 0, 1 + (chars + 1) ÷ 2)
+    tail = prevind(str, len + 1, chars ÷ 2)
+
+    # threshold: min chars skipped to make elision worthwhile
+    t = short + ndigits(len - chars) - 1
+    n = tail - head # skipped code units
+    if 4t ≤ n || t ≤ n && t ≤ length(str, head, tail-1)
+        skip = skip_text(n)
+        show(io, SubString(str, 1:prevind(str, head)))
+        print(io, skip) # TODO: bold styled
+        show(io, SubString(str, tail))
+    else
+        show(io, str)
+    end
+end
+
 # optimized methods to avoid iterating over chars
 write(io::IO, s::Union{String,SubString{String}}) =
     GC.@preserve s Int(unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))))::Int
@@ -199,14 +249,18 @@ print(io::IO, s::Union{String,SubString{String}}) = (write(io, s); nothing)
 Create a string from any value using the [`show`](@ref) function.
 You should not add methods to `repr`; define a `show` method instead.
 
-The optional keyword argument `context` can be set to an `IO` or [`IOContext`](@ref)
-object whose attributes are used for the I/O stream passed to `show`.
+The optional keyword argument `context` can be set to a `:key=>value` pair, a
+tuple of `:key=>value` pairs, or an `IO` or [`IOContext`](@ref) object whose
+attributes are used for the I/O stream passed to `show`.
 
 Note that `repr(x)` is usually similar to how the value of `x` would
 be entered in Julia.  See also [`repr(MIME("text/plain"), x)`](@ref) to instead
 return a "pretty-printed" version of `x` designed more for human consumption,
 equivalent to the REPL display of `x`.
 
+!!! compat "Julia 1.7"
+    Passing a tuple to keyword `context` requires Julia 1.7 or later.
+
 # Examples
 ```jldoctest
 julia> repr(1)
@@ -251,15 +305,12 @@ IOBuffer(s::SubString{String}) = IOBuffer(view(unsafe_wrap(Vector{UInt8}, s.stri
 # join is implemented using IO
 
 """
-    join([io::IO,] strings [, delim [, last]])
-
-Join an array of `strings` into a single string, inserting the given delimiter (if any) between
-adjacent strings. If `last` is given, it will be used instead of `delim` between the last
-two strings. If `io` is given, the result is written to `io` rather than returned
-as a `String`.
+    join([io::IO,] iterator [, delim [, last]])
 
-`strings` can be any iterable over elements `x` which are convertible to strings
-via `print(io::IOBuffer, x)`. `strings` will be printed to `io`.
+Join any `iterator` into a single string, inserting the given delimiter (if any) between
+adjacent items.  If `last` is given, it will be used instead of `delim` between the last
+two items.  Each item of `iterator` is converted to a string via `print(io::IOBuffer, x)`.
+If `io` is given, the result is written to `io` rather than returned as a `String`.
 
 # Examples
 ```jldoctest
@@ -270,15 +321,15 @@ julia> join([1,2,3,4,5])
 "12345"
 ```
 """
-function join(io::IO, strings, delim, last)
+function join(io::IO, iterator, delim, last)
     first = true
     local prev
-    for str in strings
+    for item in iterator
         if @isdefined prev
             first ? (first = false) : print(io, delim)
             print(io, prev)
         end
-        prev = str
+        prev = item
     end
     if @isdefined prev
         first || print(io, last)
@@ -286,19 +337,19 @@ function join(io::IO, strings, delim, last)
     end
     nothing
 end
-function join(io::IO, strings, delim="")
+function join(io::IO, iterator, delim="")
     # Specialization of the above code when delim==last,
     # which lets us emit (compile) less code
     first = true
-    for str in strings
+    for item in iterator
         first ? (first = false) : print(io, delim)
-        print(io, str)
+        print(io, item)
     end
 end
 
-join(strings) = sprint(join, strings)
-join(strings, delim) = sprint(join, strings, delim)
-join(strings, delim, last) = sprint(join, strings, delim, last)
+join(iterator) = sprint(join, iterator)
+join(iterator, delim) = sprint(join, iterator, delim)
+join(iterator, delim, last) = sprint(join, iterator, delim, last)
 
 ## string escaping & unescaping ##
 
@@ -323,6 +374,8 @@ escaped by a prepending backslash (`\"` is also escaped by default in the first
 The argument `keep` specifies a collection of characters which are to be kept as
 they are. Notice that `esc` has precedence here.
 
+See also [`unescape_string`](@ref) for the reverse operation.
+
 !!! compat "Julia 1.7"
     The `keep` argument is available as of Julia 1.7.
 
@@ -343,9 +396,6 @@ julia> escape_string(string('\\u2135','\\0')) # unambiguous
 julia> escape_string(string('\\u2135','\\0','0')) # \\0 would be ambiguous
 "ℵ\\\\x000"
 ```
-
-## See also
-[`unescape_string`](@ref) for the reverse operation.
 """
 function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
     a = Iterators.Stateful(s)
@@ -406,6 +456,8 @@ The following escape sequences are recognised:
  - Hex bytes (`\\x` with 1-2 trailing hex digits)
  - Octal bytes (`\\` with 1-3 trailing octal digits)
 
+See also [`escape_string`](@ref).
+
 # Examples
 ```jldoctest
 julia> unescape_string("aaa\\\\nbbb") # C escape sequence
@@ -420,9 +472,6 @@ julia> unescape_string("\\\\101") # octal
 julia> unescape_string("aaa \\\\g \\\\n", ['g']) # using `keep` argument
 "aaa \\\\g \\n"
 ```
-
-## See also
-[`escape_string`](@ref).
 """
 function unescape_string(io::IO, s::AbstractString, keep = ())
     a = Iterators.Stateful(s)
@@ -540,7 +589,7 @@ macro raw_str(s); s; end
 
 Escape a string in the manner used for parsing raw string literals.
 For each double-quote (`"`) character in input string `s`, this
-function counts the number _n_ of preceeding backslash (`\\`) characters,
+function counts the number _n_ of preceding backslash (`\\`) characters,
 and then increases there the number of backslashes from _n_ to 2_n_+1
 (even for _n_ = 0). It also doubles a sequence of backslashes at the end
 of the string.
@@ -550,7 +599,7 @@ string literals. (It also happens to be the escaping convention
 expected by the Microsoft C/C++ compiler runtime when it parses a
 command-line string into the argv[] array.)
 
-See also: [`escape_string`](@ref)
+See also [`escape_string`](@ref).
 """
 function escape_raw_string(io, str::AbstractString)
     escapes = 0
@@ -622,6 +671,8 @@ end
 
 Remove leading indentation from string.
 
+See also `indent` from the [`MultilineStrings` package](https://github.com/invenia/MultilineStrings.jl).
+
 # Examples
 ```jldoctest
 julia> Base.unindent("   a\\n   b", 2)
@@ -630,8 +681,6 @@ julia> Base.unindent("   a\\n   b", 2)
 julia> Base.unindent("\\ta\\n\\tb", 2, tabwidth=8)
 "      a\\n      b"
 ```
-
-See also `indent` from the [`MultilineStrings` package](https://github.com/invenia/MultilineStrings.jl).
 """
 function unindent(str::AbstractString, indent::Int; tabwidth=8)
     indent == 0 && return str
diff --git a/base/strings/search.jl b/base/strings/search.jl
index 51b23705671f7a..938ed8d527d997 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -25,6 +25,9 @@ findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a
 findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
     nothing_sentinel(_search(a, pred.x, i))
 
+findfirst(::typeof(iszero), a::ByteArray) = nothing_sentinel(_search(a, zero(UInt8)))
+findnext(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_search(a, zero(UInt8), i))
+
 function _search(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1)
     if i < 1
         throw(BoundsError(a, i))
@@ -65,6 +68,9 @@ findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a:
 findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
     nothing_sentinel(_rsearch(a, pred.x, i))
 
+findlast(::typeof(iszero), a::ByteArray) = nothing_sentinel(_rsearch(a, zero(UInt8)))
+findprev(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_rsearch(a, zero(UInt8), i))
+
 function _rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(a))
     if i < 1
         return i == 0 ? 0 : throw(BoundsError(a, i))
@@ -162,11 +168,12 @@ in(c::AbstractChar, s::AbstractString) = (findfirst(isequal(c),s)!==nothing)
 function _searchindex(s::Union{AbstractString,ByteArray},
                       t::Union{AbstractString,AbstractChar,Int8,UInt8},
                       i::Integer)
-    if isempty(t)
+    x = Iterators.peel(t)
+    if isnothing(x)
         return 1 <= i <= nextind(s,lastindex(s))::Int ? i :
                throw(BoundsError(s, i))
     end
-    t1, trest = Iterators.peel(t)
+    t1, trest = x
     while true
         i = findnext(isequal(t1),s,i)
         if i === nothing return 0 end
@@ -420,7 +427,7 @@ function _rsearchindex(s::AbstractString,
         return 1 <= i <= nextind(s, lastindex(s))::Int ? i :
                throw(BoundsError(s, i))
     end
-    t1, trest = Iterators.peel(Iterators.reverse(t))
+    t1, trest = Iterators.peel(Iterators.reverse(t))::NTuple{2,Any}
     while true
         i = findprev(isequal(t1), s, i)
         i === nothing && return 0
@@ -458,7 +465,7 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector
     n = length(t)
     m = length(s)
     k = Int(_k) - sentinel
-    k < 1 && throw(BoundsError(s, _k))
+    k < 0 && throw(BoundsError(s, _k))
 
     if n == 0
         return 0 <= k <= m ? max(k, 1) : sentinel
@@ -616,7 +623,7 @@ julia> occursin(r"a.a", "abba")
 false
 ```
 
-See also: [`contains`](@ref).
+See also [`contains`](@ref).
 """
 occursin(needle::Union{AbstractString,AbstractChar}, haystack::AbstractString) =
     _searchindex(haystack, needle, firstindex(haystack)) != 0
@@ -634,4 +641,4 @@ The returned function is of type `Base.Fix2{typeof(occursin)}`.
 """
 occursin(haystack) = Base.Fix2(occursin, haystack)
 
-in(::AbstractString, ::AbstractString) = error("use occursin(x, y) for string containment")
+in(::AbstractString, ::AbstractString) = error("use occursin(needle, haystack) for string containment")
diff --git a/base/strings/string.jl b/base/strings/string.jl
index 99b706fc27ced7..c818e2e1844fbe 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -25,7 +25,7 @@ function Base.showerror(io::IO, exc::StringIndexError)
     end
 end
 
-const ByteArray = Union{Vector{UInt8},Vector{Int8}}
+const ByteArray = Union{CodeUnits{UInt8,String}, Vector{UInt8},Vector{Int8}, FastContiguousSubArray{UInt8,1,CodeUnits{UInt8,String}}, FastContiguousSubArray{UInt8,1,Vector{UInt8}}, FastContiguousSubArray{Int8,1,Vector{Int8}}}
 
 @inline between(b::T, lo::T, hi::T) where {T<:Integer} = (lo ≤ b) & (b ≤ hi)
 
@@ -252,7 +252,7 @@ function getindex_continued(s::String, i::Int, u::UInt32)
     return reinterpret(Char, u)
 end
 
-getindex(s::String, r::UnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
+getindex(s::String, r::AbstractUnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
 
 @inline function getindex(s::String, r::UnitRange{Int})
     isempty(r) && return ""
diff --git a/base/strings/substring.jl b/base/strings/substring.jl
index 3e99cc7477446d..7cc4c53a6b6619 100644
--- a/base/strings/substring.jl
+++ b/base/strings/substring.jl
@@ -205,7 +205,13 @@ end
     return n
 end
 
-function string(a::Union{Char, String, SubString{String}}...)
+@inline function __unsafe_string!(out, s::Symbol, offs::Integer)
+    n = sizeof(s)
+    GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
+    return n
+end
+
+function string(a::Union{Char, String, SubString{String}, Symbol}...)
     n = 0
     for v in a
         if v isa Char
@@ -252,4 +258,4 @@ function filter(f, s::Union{String, SubString{String}})
     return String(out)
 end
 
-getindex(s::AbstractString, r::UnitRange{<:Integer}) = SubString(s, r)
+getindex(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 38ffacd8aa5724..cf215849ab08ca 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -283,7 +283,7 @@ isassigned(c) = UTF8PROC_CATEGORY_CN < category_code(c) <= UTF8PROC_CATEGORY_CO
 Tests whether a character is a lowercase letter (according to the Unicode
 standard's `Lowercase` derived property).
 
-See also: [`isuppercase`](@ref).
+See also [`isuppercase`](@ref).
 
 # Examples
 ```jldoctest
@@ -307,7 +307,7 @@ islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isl
 Tests whether a character is an uppercase letter (according to the Unicode
 standard's `Uppercase` derived property).
 
-See also: [`islowercase`](@ref).
+See also [`islowercase`](@ref).
 
 # Examples
 ```jldoctest
@@ -328,7 +328,7 @@ isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isu
 
 Tests whether a character is cased, i.e. is lower-, upper- or title-cased.
 
-See also: [`islowercase`](@ref), [`isuppercase`](@ref).
+See also [`islowercase`](@ref), [`isuppercase`](@ref).
 """
 function iscased(c::AbstractChar)
     cat = category_code(c)
@@ -514,7 +514,7 @@ isxdigit(c::AbstractChar) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
 
 Return `s` with all characters converted to uppercase.
 
-See also: [`lowercase`](@ref), [`titlecase`](@ref), [`uppercasefirst`](@ref).
+See also [`lowercase`](@ref), [`titlecase`](@ref), [`uppercasefirst`](@ref).
 
 # Examples
 ```jldoctest
@@ -529,7 +529,7 @@ uppercase(s::AbstractString) = map(uppercase, s)
 
 Return `s` with all characters converted to lowercase.
 
-See also: [`uppercase`](@ref), [`titlecase`](@ref), [`lowercasefirst`](@ref).
+See also [`uppercase`](@ref), [`titlecase`](@ref), [`lowercasefirst`](@ref).
 
 # Examples
 ```jldoctest
@@ -551,7 +551,7 @@ which characters should be considered as word separators.
 See also [`uppercasefirst`](@ref) to capitalize only the first
 character in `s`.
 
-See also: [`uppercase`](@ref), [`lowercase`](@ref), [`uppercasefirst`](@ref).
+See also [`uppercase`](@ref), [`lowercase`](@ref), [`uppercasefirst`](@ref).
 
 # Examples
 ```jldoctest
@@ -593,8 +593,8 @@ Return `s` with the first character converted to uppercase (technically "title
 case" for Unicode). See also [`titlecase`](@ref) to capitalize the first
 character of every word in `s`.
 
-See also: [`lowercasefirst`](@ref), [`uppercase`](@ref), [`lowercase`](@ref),
-[`titlecase`](@ref)
+See also [`lowercasefirst`](@ref), [`uppercase`](@ref), [`lowercase`](@ref),
+[`titlecase`](@ref).
 
 # Examples
 ```jldoctest
@@ -615,8 +615,8 @@ end
 
 Return `s` with the first character converted to lowercase.
 
-See also: [`uppercasefirst`](@ref), [`uppercase`](@ref), [`lowercase`](@ref),
-[`titlecase`](@ref)
+See also [`uppercasefirst`](@ref), [`uppercase`](@ref), [`lowercase`](@ref),
+[`titlecase`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/base/strings/util.jl b/base/strings/util.jl
index 140c5a31194f1f..b2e9e2e4588f5e 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -10,7 +10,7 @@ const Chars = Union{AbstractChar,Tuple{Vararg{AbstractChar}},AbstractVector{<:Ab
 Return `true` if `s` starts with `prefix`. If `prefix` is a vector or set
 of characters, test whether the first character of `s` belongs to that set.
 
-See also [`endswith`](@ref).
+See also [`endswith`](@ref), [`contains`](@ref).
 
 # Examples
 ```jldoctest
@@ -30,7 +30,7 @@ startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str)::Abs
 Return `true` if `s` ends with `suffix`. If `suffix` is a vector or set of
 characters, test whether the last character of `s` belongs to that set.
 
-See also [`startswith`](@ref).
+See also [`startswith`](@ref), [`contains`](@ref).
 
 # Examples
 ```jldoctest
@@ -77,6 +77,8 @@ Return `true` if `haystack` contains `needle`.
 This is the same as `occursin(needle, haystack)`, but is provided for consistency with
 `startswith(haystack, needle)` and `endswith(haystack, needle)`.
 
+See also [`occursin`](@ref), [`in`](@ref), [`issubset`](@ref).
+
 # Examples
 ```jldoctest
 julia> contains("JuliaLang is pretty cool!", "Julia")
@@ -166,6 +168,8 @@ The call `chop(s)` removes the last character from `s`.
 If it is requested to remove more characters than `length(s)`
 then an empty string is returned.
 
+See also [`chomp`](@ref), [`startswith`](@ref), [`first`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = "March"
@@ -196,6 +200,8 @@ end
 
 Remove a single trailing newline from a string.
 
+See also [`chop`](@ref).
+
 # Examples
 ```jldoctest
 julia> chomp("Hello\\n")
@@ -233,6 +239,8 @@ The default behaviour is to remove leading whitespace and delimiters: see
 The optional `chars` argument specifies which characters to remove: it can be a single
 character, or a vector or set of characters.
 
+See also [`strip`](@ref) and [`rstrip`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = lpad("March", 20)
@@ -265,6 +273,8 @@ The default behaviour is to remove trailing whitespace and delimiters: see
 The optional `chars` argument specifies which characters to remove: it can be a single
 character, or a vector or set of characters.
 
+See also [`strip`](@ref) and [`lstrip`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = rpad("March", 20)
@@ -296,6 +306,8 @@ The default behaviour is to remove leading and trailing whitespace and delimiter
 The optional `chars` argument specifies which characters to remove: it can be a single
 character, vector or set of characters.
 
+See also [`lstrip`](@ref) and [`rstrip`](@ref).
+
 !!! compat "Julia 1.2"
     The method which accepts a predicate function requires Julia 1.2 or later.
 
@@ -315,7 +327,7 @@ strip(f, s::AbstractString) = lstrip(f, rstrip(f, s))
     lpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') -> String
 
 Stringify `s` and pad the resulting string on the left with `p` to make it `n`
-characters (code points) long. If `s` is already `n` characters long, an equal
+characters (in [`textwidth`](@ref)) long. If `s` is already `n` characters long, an equal
 string is returned. Pad with spaces by default.
 
 # Examples
@@ -323,6 +335,8 @@ string is returned. Pad with spaces by default.
 julia> lpad("March", 10)
 "     March"
 ```
+!!! compat "Julia 1.7"
+    In Julia 1.7, this function was changed to use `textwidth` rather than a raw character (codepoint) count.
 """
 lpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') = lpad(string(s)::AbstractString, n, string(p))
 
@@ -332,9 +346,9 @@ function lpad(
     p::Union{AbstractChar,AbstractString}=' ',
 ) :: String
     n = Int(n)::Int
-    m = signed(n) - Int(length(s))::Int
+    m = signed(n) - Int(textwidth(s))::Int
     m ≤ 0 && return string(s)
-    l = length(p)
+    l = textwidth(p)
     q, r = divrem(m, l)
     r == 0 ? string(p^q, s) : string(p^q, first(p, r), s)
 end
@@ -343,7 +357,7 @@ end
     rpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') -> String
 
 Stringify `s` and pad the resulting string on the right with `p` to make it `n`
-characters (code points) long. If `s` is already `n` characters long, an equal
+characters (in [`textwidth`](@ref)) long. If `s` is already `n` characters long, an equal
 string is returned. Pad with spaces by default.
 
 # Examples
@@ -351,6 +365,8 @@ string is returned. Pad with spaces by default.
 julia> rpad("March", 20)
 "March               "
 ```
+!!! compat "Julia 1.7"
+    In Julia 1.7, this function was changed to use `textwidth` rather than a raw character (codepoint) count.
 """
 rpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') = rpad(string(s)::AbstractString, n, string(p))
 
@@ -360,9 +376,9 @@ function rpad(
     p::Union{AbstractChar,AbstractString}=' ',
 ) :: String
     n = Int(n)::Int
-    m = signed(n) - Int(length(s))::Int
+    m = signed(n) - Int(textwidth(s))::Int
     m ≤ 0 && return string(s)
-    l = length(p)
+    l = textwidth(p)
     q, r = divrem(m, l)
     r == 0 ? string(s, p^q) : string(s, p^q, first(p, r))
 end
@@ -510,56 +526,74 @@ _replace(io, repl::Function, str, r, pattern) =
 _replace(io, repl::Function, str, r, pattern::Function) =
     print(io, repl(str[first(r)]))
 
-replace(str::String, pat_repl::Pair{<:AbstractChar}; count::Integer=typemax(Int)) =
-    replace(str, isequal(first(pat_repl)) => last(pat_repl); count=count)
-
-replace(str::String, pat_repl::Pair{<:Union{Tuple{Vararg{AbstractChar}},
-                                            AbstractVector{<:AbstractChar},Set{<:AbstractChar}}};
-        count::Integer=typemax(Int)) =
-    replace(str, in(first(pat_repl)) => last(pat_repl), count=count)
-
 _pat_replacer(x) = x
 _free_pat_replacer(x) = nothing
 
-function replace(str::String, pat_repl::Pair; count::Integer=typemax(Int))
-    pattern, repl = pat_repl
+_pat_replacer(x::AbstractChar) = isequal(x)
+_pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}) = in(x)
+
+function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(Int)) where N
     count == 0 && return str
     count < 0 && throw(DomainError(count, "`count` must be non-negative."))
     n = 1
-    e = lastindex(str)
+    e1 = nextind(str, lastindex(str)) # sizeof(str)
     i = a = firstindex(str)
-    pattern = _pat_replacer(pattern)
-    r = something(findnext(pattern,str,i), 0)
-    j, k = first(r), last(r)
-    if j == 0
-        _free_pat_replacer(pattern)
+    patterns = map(p -> _pat_replacer(first(p)), pat_repl)
+    replaces = map(last, pat_repl)
+    rs = map(patterns) do p
+        r = findnext(p, str, a)
+        if r === nothing || first(r) == 0
+            return e1+1:0
+        end
+        r isa Int && (r = r:r) # findnext / performance fix
+        return r
+    end
+    if all(>(e1), map(first, rs))
+        foreach(_free_pat_replacer, patterns)
         return str
     end
     out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
-    while j != 0
+    while true
+        p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ?
+        r = rs[p]
+        j, k = first(r), last(r)
+        j > e1 && break
         if i == a || i <= k
+            # copy out preserved portion
             GC.@preserve str unsafe_write(out, pointer(str, i), UInt(j-i))
-            _replace(out, repl, str, r, pattern)
+            # copy out replacement string
+            _replace(out, replaces[p], str, r, patterns[p])
         end
         if k < j
             i = j
-            j > e && break
+            j == e1 && break
             k = nextind(str, j)
         else
             i = k = nextind(str, k)
         end
-        r = something(findnext(pattern,str,k), 0)
-        r === 0:-1 || n == count && break
-        j, k = first(r), last(r)
+        n == count && break
+        let k = k
+            rs = map(patterns, rs) do p, r
+                if first(r) < k
+                    r = findnext(p, str, k)
+                    if r === nothing || first(r) == 0
+                        return e1+1:0
+                    end
+                    r isa Int && (r = r:r) # findnext / performance fix
+                end
+                return r
+            end
+        end
         n += 1
     end
-    _free_pat_replacer(pattern)
-    write(out, SubString(str,i))
-    String(take!(out))
+    foreach(_free_pat_replacer, patterns)
+    write(out, SubString(str, i))
+    return String(take!(out))
 end
 
+
 """
-    replace(s::AbstractString, pat=>r; [count::Integer])
+    replace(s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer])
 
 Search for the given pattern `pat` in `s`, and replace each occurrence with `r`.
 If `count` is provided, replace at most `count` occurrences.
@@ -572,6 +606,13 @@ If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then
 references in `r` are replaced with the corresponding matched text.
 To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`).
 
+Multiple patterns can be specified, and they will be applied left-to-right
+simultaneously, so only one pattern will be applied to any character, and the
+patterns will only be applied to the input text, not the replacements.
+
+!!! compat "Julia 1.7"
+    Support for multiple patterns requires version 1.7.
+
 # Examples
 ```jldoctest
 julia> replace("Python is a programming language.", "Python" => "Julia")
@@ -585,25 +626,33 @@ julia> replace("The quick foxes run quickly.", "quick" => "", count=1)
 
 julia> replace("The quick foxes run quickly.", r"fox(es)?" => s"bus\\1")
 "The quick buses run quickly."
+
+julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a")
+"bca"
 ```
 """
-replace(s::AbstractString, pat_f::Pair; count=typemax(Int)) =
-    replace(String(s), pat_f, count=count)
+replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) =
+    replace(String(s), pat_f..., count=count)
 
 # TODO: allow transform as the first argument to replace?
 
 # hex <-> bytes conversion
 
 """
-    hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}})
+    hex2bytes(itr)
 
-Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a
+Given an iterable `itr` of ASCII codes for a sequence of hexadecimal digits, returns a
 `Vector{UInt8}` of bytes  corresponding to the binary representation: each successive pair
-of hexadecimal digits in `s` gives the value of one byte in the return vector.
+of hexadecimal digits in `itr` gives the value of one byte in the return vector.
 
-The length of `s` must be even, and the returned array has half of the length of `s`.
+The length of `itr` must be even, and the returned array has half of the length of `itr`.
 See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse.
 
+!!! compat "Julia 1.7"
+    Calling hex2bytes with iterables producing UInt8 requires
+    version 1.7. In earlier versions, you can collect the iterable
+    before calling instead.
+
 # Examples
 ```jldoctest
 julia> s = string(12345, base = 16)
@@ -632,46 +681,64 @@ julia> hex2bytes(a)
 """
 function hex2bytes end
 
-hex2bytes(s::AbstractString) = hex2bytes(String(s))
-hex2bytes(s::Union{String,AbstractVector{UInt8}}) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)
+hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)
 
-_firstbyteidx(s::String) = 1
-_firstbyteidx(s::AbstractVector{UInt8}) = first(eachindex(s))
-_lastbyteidx(s::String) = sizeof(s)
-_lastbyteidx(s::AbstractVector{UInt8}) = lastindex(s)
+# special case - valid bytes are checked in the generic implementation
+function hex2bytes!(dest::AbstractArray{UInt8}, s::String)
+    sizeof(s) != length(s) && throw(ArgumentError("input string must consist of hexadecimal characters only"))
+
+    hex2bytes!(dest, transcode(UInt8, s))
+end
 
 """
-    hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
+    hex2bytes!(dest::AbstractVector{UInt8}, itr)
 
-Convert an array `s` of bytes representing a hexadecimal string to its binary
+Convert an iterable `itr` of bytes representing a hexadecimal string to its binary
 representation, similar to [`hex2bytes`](@ref) except that the output is written in-place
-in `d`.   The length of `s` must be exactly twice the length of `d`.
-"""
-function hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
-    if 2length(d) != sizeof(s)
-        isodd(sizeof(s)) && throw(ArgumentError("input hex array must have even length"))
-        throw(ArgumentError("output array must be half length of input array"))
+to `dest`. The length of `dest` must be half the length of `itr`.
+
+!!! compat "Julia 1.7"
+    Calling hex2bytes! with iterators producing UInt8 requires
+    version 1.7. In earlier versions, you can collect the iterable
+    before calling instead.
+"""
+function hex2bytes!(dest::AbstractArray{UInt8}, itr)
+    isodd(length(itr)) && throw(ArgumentError("length of iterable must be even"))
+    @boundscheck 2*length(dest) != length(itr) && throw(ArgumentError("length of output array must be half of the length of input iterable"))
+    iszero(length(itr)) && return dest
+
+    next = iterate(itr)
+    @inbounds for i in eachindex(dest)
+        x,state = next::NTuple{2,Any}
+        y,state = iterate(itr, state)::NTuple{2,Any}
+        next = iterate(itr, state)
+        dest[i] = number_from_hex(x) << 4 + number_from_hex(y)
     end
-    j = first(eachindex(d)) - 1
-    for i = _firstbyteidx(s):2:_lastbyteidx(s)
-        @inbounds d[j += 1] = number_from_hex(_nthbyte(s,i)) << 4 + number_from_hex(_nthbyte(s,i+1))
-    end
-    return d
+
+    return dest
 end
 
-@inline number_from_hex(c) =
-    (UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') :
-    (UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) :
-    (UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) :
+@inline number_from_hex(c::AbstractChar) = number_from_hex(Char(c))
+@inline number_from_hex(c::Char) = number_from_hex(UInt8(c))
+@inline function number_from_hex(c::UInt8)
+    UInt8('0') <= c <= UInt8('9') && return c - UInt8('0')
+    c |= 0b0100000
+    UInt8('a') <= c <= UInt8('f') && return c - UInt8('a') + 0x0a
     throw(ArgumentError("byte is not an ASCII hexadecimal digit"))
+end
 
 """
-    bytes2hex(a::AbstractArray{UInt8}) -> String
-    bytes2hex(io::IO, a::AbstractArray{UInt8})
+    bytes2hex(itr) -> String
+    bytes2hex(io::IO, itr)
+
+Convert an iterator `itr` of bytes to its hexadecimal string representation, either
+returning a `String` via `bytes2hex(itr)` or writing the string to an `io` stream
+via `bytes2hex(io, itr)`.  The hexadecimal characters are all lowercase.
 
-Convert an array `a` of bytes to its hexadecimal string representation, either
-returning a `String` via `bytes2hex(a)` or writing the string to an `io` stream
-via `bytes2hex(io, a)`.  The hexadecimal characters are all lowercase.
+!!! compat "Julia 1.7"
+    Calling bytes2hex with iterators producing UInt8 requires
+    version 1.7. In earlier versions, you can collect the iterable
+    before calling instead.
 
 # Examples
 ```jldoctest
@@ -689,17 +756,19 @@ julia> bytes2hex(b)
 """
 function bytes2hex end
 
-function bytes2hex(a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}})
-    b = Base.StringVector(2*length(a))
-    @inbounds for (i, x) in enumerate(a)
+function bytes2hex(itr)
+    eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
+    b = Base.StringVector(2*length(itr))
+    @inbounds for (i, x) in enumerate(itr)
         b[2i - 1] = hex_chars[1 + x >> 4]
         b[2i    ] = hex_chars[1 + x & 0xf]
     end
     return String(b)
 end
 
-function bytes2hex(io::IO, a::Union{Tuple{Vararg{UInt8}}, AbstractArray{UInt8}})
-    for x in a
+function bytes2hex(io::IO, itr)
+    eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
+    for x in itr
         print(io, Char(hex_chars[1 + x >> 4]), Char(hex_chars[1 + x & 0xf]))
     end
 end
@@ -719,6 +788,8 @@ end
 Convert a string to `String` type and check that it contains only ASCII data, otherwise
 throwing an `ArgumentError` indicating the position of the first non-ASCII byte.
 
+See also the [`isascii`](@ref) predicate to filter or replace non-ASCII characters.
+
 # Examples
 ```jldoctest
 julia> ascii("abcdeγfgh")
diff --git a/base/subarray.jl b/base/subarray.jl
index 32262058cb55b2..ff2408bb48534a 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -17,22 +17,22 @@ struct SubArray{T,N,P,I,L} <: AbstractArray{T,N}
     offset1::Int       # for linear indexing and pointer, only valid when L==true
     stride1::Int       # used only for linear indexing
     function SubArray{T,N,P,I,L}(parent, indices, offset1, stride1) where {T,N,P,I,L}
-        @_inline_meta
+        @inline
         check_parent_index_match(parent, indices)
         new(parent, indices, offset1, stride1)
     end
 end
 # Compute the linear indexability of the indices, and combine it with the linear indexing of the parent
 function SubArray(parent::AbstractArray, indices::Tuple)
-    @_inline_meta
+    @inline
     SubArray(IndexStyle(viewindexing(indices), IndexStyle(parent)), parent, ensure_indexable(indices), index_dimsum(indices...))
 end
 function SubArray(::IndexCartesian, parent::P, indices::I, ::NTuple{N,Any}) where {P,I,N}
-    @_inline_meta
+    @inline
     SubArray{eltype(P), N, P, I, false}(parent, indices, 0, 0)
 end
 function SubArray(::IndexLinear, parent::P, indices::I, ::NTuple{N,Any}) where {P,I,N}
-    @_inline_meta
+    @inline
     # Compute the stride and offset
     stride1 = compute_stride1(parent, indices)
     SubArray{eltype(P), N, P, I, true}(parent, indices, compute_offset1(parent, stride1, indices), stride1)
@@ -46,9 +46,9 @@ check_parent_index_match(parent, ::NTuple{N, Bool}) where {N} =
 # This computes the linear indexing compatibility for a given tuple of indices
 viewindexing(I::Tuple{}) = IndexLinear()
 # Leading scalar indices simply increase the stride
-viewindexing(I::Tuple{ScalarIndex, Vararg{Any}}) = (@_inline_meta; viewindexing(tail(I)))
+viewindexing(I::Tuple{ScalarIndex, Vararg{Any}}) = (@inline; viewindexing(tail(I)))
 # Slices may begin a section which may be followed by any number of Slices
-viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@_inline_meta; viewindexing(tail(I)))
+viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@inline; viewindexing(tail(I)))
 # A UnitRange can follow Slices, but only if all other indices are scalar
 viewindexing(I::Tuple{Slice, AbstractUnitRange, Vararg{ScalarIndex}}) = IndexLinear()
 viewindexing(I::Tuple{Slice, Slice, Vararg{ScalarIndex}}) = IndexLinear() # disambiguate
@@ -60,7 +60,7 @@ viewindexing(I::Tuple{Vararg{Any}}) = IndexCartesian()
 viewindexing(I::Tuple{AbstractArray, Vararg{Any}}) = IndexCartesian()
 
 # Simple utilities
-size(V::SubArray) = (@_inline_meta; map(unsafe_length, axes(V)))
+size(V::SubArray) = (@inline; map(length, axes(V)))
 
 similar(V::SubArray, T::Type, dims::Dims) = similar(V.parent, T, dims)
 
@@ -172,7 +172,7 @@ julia> view(2:5, 2:3) # returns a range as type is immutable
 ```
 """
 function view(A::AbstractArray, I::Vararg{Any,N}) where {N}
-    @_inline_meta
+    @inline
     J = map(i->unalias(A,i), to_indices(A, I))
     @boundscheck checkbounds(A, J...)
     unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...)
@@ -204,8 +204,14 @@ function view(r1::LinRange, r2::OrdinalRange{<:Integer})
     getindex(r1, r2)
 end
 
+# getindex(r::AbstractRange, ::Colon) returns a copy of the range, and we may do the same for a view
+function view(r1::AbstractRange, c::Colon)
+    @_propagate_inbounds_meta
+    getindex(r1, c)
+end
+
 function unsafe_view(A::AbstractArray, I::Vararg{ViewIndex,N}) where {N}
-    @_inline_meta
+    @inline
     SubArray(A, I)
 end
 # When we take the view of a view, it's often possible to "reindex" the parent
@@ -215,16 +221,16 @@ end
 # So we use _maybe_reindex to figure out if there are any arrays of
 # `CartesianIndex`, and if so, we punt and keep two layers of indirection.
 unsafe_view(V::SubArray, I::Vararg{ViewIndex,N}) where {N} =
-    (@_inline_meta; _maybe_reindex(V, I))
-_maybe_reindex(V, I) = (@_inline_meta; _maybe_reindex(V, I, I))
+    (@inline; _maybe_reindex(V, I))
+_maybe_reindex(V, I) = (@inline; _maybe_reindex(V, I, I))
 _maybe_reindex(V, I, ::Tuple{AbstractArray{<:AbstractCartesianIndex}, Vararg{Any}}) =
-    (@_inline_meta; SubArray(V, I))
+    (@inline; SubArray(V, I))
 # But allow arrays of CartesianIndex{1}; they behave just like arrays of Ints
 _maybe_reindex(V, I, A::Tuple{AbstractArray{<:AbstractCartesianIndex{1}}, Vararg{Any}}) =
-    (@_inline_meta; _maybe_reindex(V, I, tail(A)))
-_maybe_reindex(V, I, A::Tuple{Any, Vararg{Any}}) = (@_inline_meta; _maybe_reindex(V, I, tail(A)))
+    (@inline; _maybe_reindex(V, I, tail(A)))
+_maybe_reindex(V, I, A::Tuple{Any, Vararg{Any}}) = (@inline; _maybe_reindex(V, I, tail(A)))
 function _maybe_reindex(V, I, ::Tuple{})
-    @_inline_meta
+    @inline
     @inbounds idxs = to_indices(V.parent, reindex(V.indices, I))
     SubArray(V.parent, idxs)
 end
@@ -271,7 +277,7 @@ end
 # In general, we simply re-index the parent indices by the provided ones
 SlowSubArray{T,N,P,I} = SubArray{T,N,P,I,false}
 function getindex(V::SubArray{T,N}, I::Vararg{Int,N}) where {T,N}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, I...)
     @inbounds r = V.parent[reindex(V.indices, I)...]
     r
@@ -280,7 +286,7 @@ end
 # But SubArrays with fast linear indexing pre-compute a stride and offset
 FastSubArray{T,N,P,I} = SubArray{T,N,P,I,true}
 function getindex(V::FastSubArray, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + V.stride1*i]
     r
@@ -290,7 +296,7 @@ end
 FastContiguousSubArray{T,N,P,I<:Union{Tuple{Union{Slice, AbstractUnitRange}, Vararg{Any}},
                                       Tuple{Vararg{ScalarIndex}}}} = SubArray{T,N,P,I,true}
 function getindex(V::FastContiguousSubArray, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + i]
     r
@@ -298,13 +304,13 @@ end
 # For vector views with linear indexing, we disambiguate to favor the stride/offset
 # computation as that'll generally be faster than (or just as fast as) re-indexing into a range.
 function getindex(V::FastSubArray{<:Any, 1}, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + V.stride1*i]
     r
 end
 function getindex(V::FastContiguousSubArray{<:Any, 1}, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + i]
     r
@@ -312,31 +318,31 @@ end
 
 # Indexed assignment follows the same pattern as `getindex` above
 function setindex!(V::SubArray{T,N}, x, I::Vararg{Int,N}) where {T,N}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, I...)
     @inbounds V.parent[reindex(V.indices, I)...] = x
     V
 end
 function setindex!(V::FastSubArray, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + V.stride1*i] = x
     V
 end
 function setindex!(V::FastContiguousSubArray, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + i] = x
     V
 end
 function setindex!(V::FastSubArray{<:Any, 1}, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + V.stride1*i] = x
     V
 end
 function setindex!(V::FastContiguousSubArray{<:Any, 1}, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + i] = x
     V
@@ -358,11 +364,11 @@ substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("strides is
 stride(V::SubArray, d::Integer) = d <= ndims(V) ? strides(V)[d] : strides(V)[end] * size(V)[end]
 
 compute_stride1(parent::AbstractArray, I::NTuple{N,Any}) where {N} =
-    (@_inline_meta; compute_stride1(1, fill_to_length(axes(parent), OneTo(1), Val(N)), I))
+    (@inline; compute_stride1(1, fill_to_length(axes(parent), OneTo(1), Val(N)), I))
 compute_stride1(s, inds, I::Tuple{}) = s
 compute_stride1(s, inds, I::Tuple{Vararg{ScalarIndex}}) = s
 compute_stride1(s, inds, I::Tuple{ScalarIndex, Vararg{Any}}) =
-    (@_inline_meta; compute_stride1(s*unsafe_length(inds[1]), tail(inds), tail(I)))
+    (@inline; compute_stride1(s*length(inds[1]), tail(inds), tail(I)))
 compute_stride1(s, inds, I::Tuple{AbstractRange, Vararg{Any}}) = s*step(I[1])
 compute_stride1(s, inds, I::Tuple{Slice, Vararg{Any}}) = s
 compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("invalid strided index type $(typeof(I[1]))"))
@@ -385,42 +391,42 @@ end
 # The running sum is `f`; the cumulative stride product is `s`.
 # If the parent is a vector, then we offset the parent's own indices with parameters of I
 compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{AbstractRange}) =
-    (@_inline_meta; first(I[1]) - stride1*first(axes1(I[1])))
+    (@inline; first(I[1]) - stride1*first(axes1(I[1])))
 # If the result is one-dimensional and it's a Colon, then linear
 # indexing uses the indices along the given dimension.
 # If the result is one-dimensional and it's a range, then linear
 # indexing might be offset if the index itself is offset
 # Otherwise linear indexing always matches the parent.
 compute_offset1(parent, stride1::Integer, I::Tuple) =
-    (@_inline_meta; compute_offset1(parent, stride1, find_extended_dims(1, I...), find_extended_inds(I...), I))
+    (@inline; compute_offset1(parent, stride1, find_extended_dims(1, I...), find_extended_inds(I...), I))
 compute_offset1(parent, stride1::Integer, dims::Tuple{Int}, inds::Tuple{Slice}, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1*first(axes(parent, dims[1])))  # index-preserving case
+    (@inline; compute_linindex(parent, I) - stride1*first(axes(parent, dims[1])))  # index-preserving case
 compute_offset1(parent, stride1::Integer, dims, inds::Tuple{AbstractRange}, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1*first(axes1(inds[1]))) # potentially index-offsetting case
+    (@inline; compute_linindex(parent, I) - stride1*first(axes1(inds[1]))) # potentially index-offsetting case
 compute_offset1(parent, stride1::Integer, dims, inds, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1)
+    (@inline; compute_linindex(parent, I) - stride1)
 function compute_linindex(parent, I::NTuple{N,Any}) where N
-    @_inline_meta
+    @inline
     IP = fill_to_length(axes(parent), OneTo(1), Val(N))
     compute_linindex(first(LinearIndices(parent)), 1, IP, I)
 end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{ScalarIndex, Vararg{Any}})
-    @_inline_meta
+    @inline
     Δi = I[1]-first(IP[1])
-    compute_linindex(f + Δi*s, s*unsafe_length(IP[1]), tail(IP), tail(I))
+    compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
 end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{Any, Vararg{Any}})
-    @_inline_meta
+    @inline
     Δi = first(I[1])-first(IP[1])
-    compute_linindex(f + Δi*s, s*unsafe_length(IP[1]), tail(IP), tail(I))
+    compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
 end
 compute_linindex(f, s, IP::Tuple, I::Tuple{}) = f
 
-find_extended_dims(dim, ::ScalarIndex, I...) = (@_inline_meta; find_extended_dims(dim + 1, I...))
-find_extended_dims(dim, i1, I...) = (@_inline_meta; (dim, find_extended_dims(dim + 1, I...)...))
+find_extended_dims(dim, ::ScalarIndex, I...) = (@inline; find_extended_dims(dim + 1, I...))
+find_extended_dims(dim, i1, I...) = (@inline; (dim, find_extended_dims(dim + 1, I...)...))
 find_extended_dims(dim) = ()
-find_extended_inds(::ScalarIndex, I...) = (@_inline_meta; find_extended_inds(I...))
-find_extended_inds(i1, I...) = (@_inline_meta; (i1, find_extended_inds(I...)...))
+find_extended_inds(::ScalarIndex, I...) = (@inline; find_extended_inds(I...))
+find_extended_inds(i1, I...) = (@inline; (i1, find_extended_inds(I...)...))
 find_extended_inds() = ()
 
 function unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{RangeIndex}}}) where {T,N,P}
@@ -442,10 +448,10 @@ end
 # indices are taken from the range/vector
 # Since bounds-checking is performance-critical and uses
 # indices, it's worth optimizing these implementations thoroughly
-axes(S::SubArray) = (@_inline_meta; _indices_sub(S.indices...))
-_indices_sub(::Real, I...) = (@_inline_meta; _indices_sub(I...))
+axes(S::SubArray) = (@inline; _indices_sub(S.indices...))
+_indices_sub(::Real, I...) = (@inline; _indices_sub(I...))
 _indices_sub() = ()
 function _indices_sub(i1::AbstractArray, I...)
-    @_inline_meta
-    (unsafe_indices(i1)..., _indices_sub(I...)...)
+    @inline
+    (axes(i1)..., _indices_sub(I...)...)
 end
diff --git a/base/summarysize.jl b/base/summarysize.jl
index 916214a69f88f6..4baa0e0c941b15 100644
--- a/base/summarysize.jl
+++ b/base/summarysize.jl
@@ -17,6 +17,20 @@ Compute the amount of memory, in bytes, used by all unique objects reachable fro
 - `exclude`: specifies the types of objects to exclude from the traversal.
 - `chargeall`: specifies the types of objects to always charge the size of all of their
   fields, even if those fields would normally be excluded.
+
+See also [`sizeof`](@ref).
+
+# Examples
+```jldoctest
+julia> Base.summarysize(1.0)
+8
+
+julia> Base.summarysize(Ref(rand(100)))
+848
+
+julia> sizeof(Ref(rand(100)))
+8
+```
 """
 function summarysize(obj;
                      exclude = Union{DataType, Core.TypeName, Core.MethodInstance},
@@ -120,12 +134,13 @@ function (ss::SummarySize)(obj::Array)
     if !haskey(ss.seen, datakey)
         ss.seen[datakey] = true
         dsize = Core.sizeof(obj)
-        if isbitsunion(eltype(obj))
+        T = eltype(obj)
+        if isbitsunion(T)
             # add 1 union selector byte for each element
             dsize += length(obj)
         end
         size += dsize
-        if !isempty(obj) && !Base.allocatedinline(eltype(obj))
+        if !isempty(obj) && T !== Symbol && (!Base.allocatedinline(T) || (T isa DataType && !Base.datatype_pointerfree(T)))
             push!(ss.frontier_x, obj)
             push!(ss.frontier_i, 1)
         end
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index 4c113971266d75..cdcb304271b5df 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -286,8 +286,8 @@ end
 
 Get the maximum resident set size utilized in bytes.
 See also:
-    - man page of getrusage(2) on Linux and FreeBSD.
-    - windows api `GetProcessMemoryInfo`
+    - man page of `getrusage`(2) on Linux and FreeBSD.
+    - Windows API `GetProcessMemoryInfo`.
 """
 maxrss() = ccall(:jl_maxrss, Csize_t, ())
 
@@ -516,7 +516,7 @@ function which(program_name::String)
             program_path = joinpath(path_dir, pname)
             # If we find something that matches our name and we can execute
             if isfile(program_path) && isexecutable(program_path)
-                return realpath(program_path)
+                return program_path
             end
         end
     end
diff --git a/base/task.jl b/base/task.jl
index 8c9516fb397665..0d4e5da4ccfd4a 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -77,9 +77,14 @@ function showerror(io::IO, ex::TaskFailedException, bt = nothing; backtrace=true
 end
 
 function show_task_exception(io::IO, t::Task; indent = true)
-    stack = catch_stack(t)
+    stack = current_exceptions(t)
     b = IOBuffer()
-    show_exception_stack(IOContext(b, io), stack)
+    if isempty(stack)
+        # exception stack buffer not available; probably a serialized task
+        showerror(IOContext(b, io), t.result)
+    else
+        show_exception_stack(IOContext(b, io), stack)
+    end
     str = String(take!(b))
     if indent
         str = replace(str, "\n" => "\n    ")
@@ -157,7 +162,7 @@ end
         end
     elseif field === :backtrace
         # TODO: this field name should be deprecated in 2.0
-        return catch_stack(t)[end][2]
+        return current_exceptions(t)[end][2]
     elseif field === :exception
         # TODO: this field name should be deprecated in 2.0
         return t._isexception ? t.result : nothing
@@ -417,6 +422,43 @@ macro async(expr)
     end
 end
 
+"""
+    errormonitor(t::Task)
+
+Print an error log to `stderr` if task `t` fails.
+"""
+function errormonitor(t::Task)
+    t2 = Task() do
+        if istaskfailed(t)
+            local errs = stderr
+            try # try to display the failure atomically
+                errio = IOContext(PipeBuffer(), errs::IO)
+                emphasize(errio, "Unhandled Task ")
+                display_error(errio, current_exceptions(t))
+                write(errs, errio)
+            catch
+                try # try to display the secondary error atomically
+                    errio = IOContext(PipeBuffer(), errs::IO)
+                    print(errio, "\nSYSTEM: caught exception while trying to print a failed Task notice: ")
+                    display_error(errio, current_exceptions())
+                    write(errs, errio)
+                    flush(errs)
+                    # and then the actual error, as best we can
+                    Core.print(Core.stderr, "while handling: ")
+                    Core.println(Core.stderr, current_exceptions(t)[end][1])
+                catch e
+                    # give up
+                    Core.print(Core.stderr, "\nSYSTEM: caught exception of type ", typeof(e).name.name,
+                            " while trying to print a failed Task notice; giving up\n")
+                end
+            end
+        end
+        nothing
+    end
+    _wait2(t, t2)
+    return t
+end
+
 # Capture interpolated variables in $() and move them to let-block
 function _lift_one_interp!(e)
     letargs = Any[]  # store the new gensymed arguments
@@ -577,19 +619,28 @@ function enq_work(t::Task)
     # 1. The Task's stack is currently being used by the scheduler for a certain thread.
     # 2. There is only 1 thread.
     # 3. The multiq is full (can be fixed by making it growable).
-    if t.sticky || tid != 0 || Threads.nthreads() == 1
+    if t.sticky || Threads.nthreads() == 1
         if tid == 0
+            # Issue #41324
+            # t.sticky && tid == 0 is a task that needs to be co-scheduled with
+            # the parent task. If the parent (current_task) is not sticky we must
+            # set it to be sticky.
+            # XXX: Ideally we would be able to unset this
+            current_task().sticky = true
             tid = Threads.threadid()
             ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid-1)
         end
         push!(Workqueues[tid], t)
     else
-        tid = 0
         if ccall(:jl_enqueue_task, Cint, (Any,), t) != 0
             # if multiq is full, give to a random thread (TODO fix)
-            tid = mod(time_ns() % Int, Threads.nthreads()) + 1
-            ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid-1)
+            if tid == 0
+                tid = mod(time_ns() % Int, Threads.nthreads()) + 1
+                ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid-1)
+            end
             push!(Workqueues[tid], t)
+        else
+            tid = 0
         end
     end
     ccall(:jl_wakeup_thread, Cvoid, (Int16,), (tid - 1) % Int16)
@@ -670,6 +721,7 @@ A fast, unfair-scheduling version of `schedule(t, arg); yield()` which
 immediately yields to `t` before calling the scheduler.
 """
 function yield(t::Task, @nospecialize(x=nothing))
+    (t._state === task_state_runnable && t.queue === nothing) || error("yield: Task not runnable")
     t.result = x
     enq_work(current_task())
     set_next_task(t)
@@ -685,6 +737,13 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid
 or scheduling in any way. Its use is discouraged.
 """
 function yieldto(t::Task, @nospecialize(x=nothing))
+    # TODO: these are legacy behaviors; these should perhaps be a scheduler
+    # state error instead.
+    if t._state === task_state_done
+        return x
+    elseif t._state === task_state_failed
+        throw(t.result)
+    end
     t.result = x
     set_next_task(t)
     return try_yieldto(identity)
@@ -759,6 +818,7 @@ end
 end
 
 function wait()
+    GC.safepoint()
     W = Workqueues[Threads.threadid()]
     poptask(W)
     result = try_yieldto(ensure_rescheduled)
diff --git a/base/threadcall.jl b/base/threadcall.jl
index 2267e4ea2228c3..f0e5f336ec0ca0 100644
--- a/base/threadcall.jl
+++ b/base/threadcall.jl
@@ -30,7 +30,7 @@ macro threadcall(f, rettype, argtypes, argvals...)
     argvals = map(esc, argvals)
 
     # construct non-allocating wrapper to call C function
-    wrapper = :(function (args_ptr::Ptr{Cvoid}, retval_ptr::Ptr{Cvoid})
+    wrapper = :(function (fptr::Ptr{Cvoid}, args_ptr::Ptr{Cvoid}, retval_ptr::Ptr{Cvoid})
         p = args_ptr
         # the rest of the body is created below
     end)
@@ -42,18 +42,19 @@ macro threadcall(f, rettype, argtypes, argvals...)
         push!(body, :(p += Core.sizeof($T)))
         push!(args, arg)
     end
-    push!(body, :(ret = ccall($f, $rettype, ($(argtypes...),), $(args...))))
+    push!(body, :(ret = ccall(fptr, $rettype, ($(argtypes...),), $(args...))))
     push!(body, :(unsafe_store!(convert(Ptr{$rettype}, retval_ptr), ret)))
     push!(body, :(return Int(Core.sizeof($rettype))))
 
     # return code to generate wrapper function and send work request thread queue
     wrapper = Expr(Symbol("hygienic-scope"), wrapper, @__MODULE__)
-    return :(let fun_ptr = @cfunction($wrapper, Int, (Ptr{Cvoid}, Ptr{Cvoid}))
-        do_threadcall(fun_ptr, $rettype, Any[$(argtypes...)], Any[$(argvals...)])
+    return :(let fun_ptr = @cfunction($wrapper, Int, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}))
+        # use cglobal to look up the function on the calling thread
+        do_threadcall(fun_ptr, cglobal($f), $rettype, Any[$(argtypes...)], Any[$(argvals...)])
     end)
 end
 
-function do_threadcall(fun_ptr::Ptr{Cvoid}, rettype::Type, argtypes::Vector, argvals::Vector)
+function do_threadcall(fun_ptr::Ptr{Cvoid}, cfptr::Ptr{Cvoid}, rettype::Type, argtypes::Vector, argvals::Vector)
     # generate function pointer
     c_notify_fun = @cfunction(
         function notify_fun(idx)
@@ -86,8 +87,8 @@ function do_threadcall(fun_ptr::Ptr{Cvoid}, rettype::Type, argtypes::Vector, arg
     GC.@preserve args_arr ret_arr roots begin
         # queue up the work to be done
         ccall(:jl_queue_work, Cvoid,
-            (Ptr{Cvoid}, Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid}, Cint),
-            fun_ptr, args_arr, ret_arr, c_notify_fun, idx)
+            (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid}, Cint),
+            fun_ptr, cfptr, args_arr, ret_arr, c_notify_fun, idx)
 
         # wait for a result & return it
         wait(thread_notifiers[idx])
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index f35cb37da990b1..e66af69b3e82f0 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-export threadid, nthreads, @threads
+export threadid, nthreads, @threads, @spawn
 
 """
     Threads.threadid()
@@ -15,6 +15,10 @@ threadid() = Int(ccall(:jl_threadid, Int16, ())+1)
 
 Get the number of threads available to the Julia process. This is the inclusive upper bound
 on [`threadid()`](@ref).
+
+See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
+[`LinearAlgebra`](@ref man-linalg) standard library, and `nprocs()` in the
+[`Distributed`](@ref man-distributed) standard library.
 """
 nthreads() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
 
@@ -114,6 +118,10 @@ The default schedule (used when no `schedule` argument is present) is subject to
 
 !!! compat "Julia 1.5"
     The `schedule` argument is available as of Julia 1.5.
+
+See also: [`@spawn`](@ref Threads.@spawn), [`nthreads()`](@ref Threads.nthreads),
+[`threadid()`](@ref Threads.threadid), `pmap` in [`Distributed`](@ref man-distributed),
+`BLAS.set_num_threads` in [`LinearAlgebra`](@ref man-linalg).
 """
 macro threads(args...)
     na = length(args)
diff --git a/base/threads_overloads.jl b/base/threads_overloads.jl
index 3e6ad06760747e..a0d4bbeda22888 100644
--- a/base/threads_overloads.jl
+++ b/base/threads_overloads.jl
@@ -35,7 +35,7 @@ function Threads.foreach(f, channel::Channel;
                 # do `stop[] && break` after `f(item)` to avoid losing `item`.
                 # this isn't super comprehensive since a task could still get
                 # stuck on `take!` at `for item in channel`. We should think
-                # about a more robust mechanism to avoid dropping items. See also:
+                # about a more robust mechanism to avoid dropping items. See also
                 # https://github.com/JuliaLang/julia/pull/34543#discussion_r422695217
                 stop[] && break
             end
diff --git a/base/timing.jl b/base/timing.jl
index 7af6f038ba6ea0..45a27e33789773 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -55,7 +55,7 @@ function gc_alloc_count(diff::GC_Diff)
     diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
 end
 
-# cumulative total time spent on compilation
+# cumulative total time spent on compilation, in nanoseconds
 cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
 cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
 
@@ -205,11 +205,11 @@ macro time(ex)
     quote
         while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
         local stats = gc_num()
-        local compile_elapsedtime = cumulative_compile_time_ns_before()
         local elapsedtime = time_ns()
+        local compile_elapsedtime = cumulative_compile_time_ns_before()
         local val = $(esc(ex))
-        elapsedtime = time_ns() - elapsedtime
         compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
+        elapsedtime = time_ns() - elapsedtime
         local diff = GC_Diff(gc_num(), stats)
         time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true)
         val
@@ -251,11 +251,11 @@ macro timev(ex)
     quote
         while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
         local stats = gc_num()
-        local compile_elapsedtime = cumulative_compile_time_ns_before()
         local elapsedtime = time_ns()
+        local compile_elapsedtime = cumulative_compile_time_ns_before()
         local val = $(esc(ex))
-        elapsedtime = time_ns() - elapsedtime
         compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime
+        elapsedtime = time_ns() - elapsedtime
         local diff = GC_Diff(gc_num(), stats)
         timev_print(elapsedtime, diff, compile_elapsedtime)
         val
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
index 956c62196d6b80..4b2af426429a09 100644
--- a/base/toml_parser.jl
+++ b/base/toml_parser.jl
@@ -242,7 +242,7 @@ const err_message = Dict(
     ErrExpectedEqualAfterKey                => "expected equal sign after key",
     ErrNoTrailingDigitAfterDot              => "expected digit after dot",
     ErrOverflowError                        => "overflowed when parsing integer",
-    ErrInvalidUnicodeScalar                 => "invalid uncidode scalar",
+    ErrInvalidUnicodeScalar                 => "invalid unicode scalar",
     ErrInvalidEscapeCharacter               => "invalid escape character",
     ErrUnexpectedEofExpectedValue           => "unexpected end of file, expected a value"
 )
@@ -321,9 +321,9 @@ function Base.showerror(io::IO, err::ParserError)
     printstyled(io, " error: "; color=Base.error_color())
     println(io, format_error_message_for_err_type(err))
     # In this case we want the arrow to point one character
-    pos = err.pos
+    pos = err.pos::Int
     err.type == ErrUnexpectedEofExpectedValue && (pos += 1)
-    str1, err1 = point_to_line(err.str, pos, pos, io)
+    str1, err1 = point_to_line(err.str::String, pos, pos, io)
     @static if VERSION <= v"1.6.0-DEV.121"
         # See https://github.com/JuliaLang/julia/issues/36015
         format_fixer = get(io, :color, false) == true ? "\e[0m" : ""
@@ -751,7 +751,7 @@ isvalid_binary(c::Char) = '0' <= c <= '1'
 
 const ValidSigs = Union{typeof.([isvalid_hex, isvalid_oct, isvalid_binary, isdigit])...}
 # This function eats things accepted by `f` but also allows eating `_` in between
-# digits. Retruns if it ate at lest one character and if it ate an underscore
+# digits. Returns if it ate at lest one character and if it ate an underscore
 function accept_batch_underscore(l::Parser, f::ValidSigs, fail_if_underscore=true)::Err{Tuple{Bool, Bool}}
     contains_underscore = false
     at_least_one = false
diff --git a/base/tuple.jl b/base/tuple.jl
index d0b849f3a98030..77fa6ba0ea1a37 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -32,6 +32,9 @@ getindex(t::Tuple, r::AbstractArray{<:Any,1}) = (eltype(t)[t[ri] for ri in r]...
 getindex(t::Tuple, b::AbstractArray{Bool,1}) = length(b) == length(t) ? getindex(t, findall(b)) : throw(BoundsError(t, b))
 getindex(t::Tuple, c::Colon) = t
 
+get(t::Tuple, i::Integer, default) = i in 1:length(t) ? getindex(t, i) : default
+get(f::Callable, t::Tuple, i::Integer) = i in 1:length(t) ? getindex(t, i) : f()
+
 # returns new tuple; N.B.: becomes no-op if i is out-of-bounds
 
 """
@@ -48,21 +51,20 @@ true
 """
 function setindex(x::Tuple, v, i::Integer)
     @boundscheck 1 <= i <= length(x) || throw(BoundsError(x, i))
-    @_inline_meta
+    @inline
     _setindex(v, i, x...)
 end
 
-function _setindex(v, i::Integer, first, tail...)
-    @_inline_meta
-    return (ifelse(i == 1, v, first), _setindex(v, i - 1, tail...)...)
+function _setindex(v, i::Integer, args...)
+    @inline
+    return ntuple(j -> ifelse(j == i, v, args[j]), length(args))
 end
-_setindex(v, i::Integer) = ()
 
 
 ## iterating ##
 
 function iterate(@nospecialize(t::Tuple), i::Int=1)
-    @_inline_meta
+    @inline
     return (1 <= i <= length(t)) ? (@inbounds t[i], i + 1) : nothing
 end
 
@@ -72,19 +74,19 @@ prevind(@nospecialize(t::Tuple), i::Integer) = Int(i)-1
 nextind(@nospecialize(t::Tuple), i::Integer) = Int(i)+1
 
 function keys(t::Tuple, t2::Tuple...)
-    @_inline_meta
+    @inline
     OneTo(_maxlength(t, t2...))
 end
 _maxlength(t::Tuple) = length(t)
 function _maxlength(t::Tuple, t2::Tuple, t3::Tuple...)
-    @_inline_meta
+    @inline
     max(length(t), _maxlength(t2, t3...))
 end
 
 # this allows partial evaluation of bounded sequences of next() calls on tuples,
 # while reducing to plain next() for arbitrary iterables.
-indexed_iterate(t::Tuple, i::Int, state=1) = (@_inline_meta; (getfield(t, i), i+1))
-indexed_iterate(a::Array, i::Int, state=1) = (@_inline_meta; (a[i], i+1))
+indexed_iterate(t::Tuple, i::Int, state=1) = (@inline; (getfield(t, i), i+1))
+indexed_iterate(a::Array, i::Int, state=1) = (@inline; (a[i], i+1))
 function indexed_iterate(I, i)
     x = iterate(I)
     x === nothing && throw(BoundsError(I, i))
@@ -104,12 +106,15 @@ state `itr_state`. Return a `Tuple`, if `collection` itself is a `Tuple`, a subt
 `AbstractVector`, if `collection` is an `AbstractArray`, a subtype of `AbstractString`
 if `collection` is an `AbstractString`, and an arbitrary iterator, falling back to
 `Iterators.rest(collection[, itr_state])`, otherwise.
-Can be overloaded for user-defined collection types to customize the behavior of slurping
-in assignments, like `a, b... = collection`.
+
+Can be overloaded for user-defined collection types to customize the behavior of [slurping
+in assignments](@ref destructuring-assignment), like `a, b... = collection`.
 
 !!! compat "Julia 1.6"
     `Base.rest` requires at least Julia 1.6.
 
+See also: [`first`](@ref first), [`Iterators.rest`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = [1 2; 3 4]
@@ -186,6 +191,8 @@ safe_tail(t::Tuple{}) = ()
 
 Return a `Tuple` consisting of all but the last component of `x`.
 
+See also: [`first`](@ref), [`tail`](@ref Base.tail).
+
 # Examples
 ```jldoctest
 julia> Base.front((1,2,3))
@@ -196,13 +203,13 @@ ERROR: ArgumentError: Cannot call front on an empty tuple.
 ```
 """
 function front(t::Tuple)
-    @_inline_meta
+    @inline
     _front(t...)
 end
 _front() = throw(ArgumentError("Cannot call front on an empty tuple."))
 _front(v) = ()
 function _front(v, t...)
-    @_inline_meta
+    @inline
     (v, _front(t...)...)
 end
 
@@ -210,16 +217,22 @@ end
 
 # 1 argument function
 map(f, t::Tuple{})              = ()
-map(f, t::Tuple{Any,})          = (@_inline_meta; (f(t[1]),))
-map(f, t::Tuple{Any, Any})      = (@_inline_meta; (f(t[1]), f(t[2])))
-map(f, t::Tuple{Any, Any, Any}) = (@_inline_meta; (f(t[1]), f(t[2]), f(t[3])))
-map(f, t::Tuple)                = (@_inline_meta; (f(t[1]), map(f,tail(t))...))
+map(f, t::Tuple{Any,})          = (@inline; (f(t[1]),))
+map(f, t::Tuple{Any, Any})      = (@inline; (f(t[1]), f(t[2])))
+map(f, t::Tuple{Any, Any, Any}) = (@inline; (f(t[1]), f(t[2]), f(t[3])))
+map(f, t::Tuple)                = (@inline; (f(t[1]), map(f,tail(t))...))
 # stop inlining after some number of arguments to avoid code blowup
-const Any16{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
-                       Any,Any,Any,Any,Any,Any,Any,Any,Vararg{Any,N}}
-const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
-                         T,T,T,T,T,T,T,T,Vararg{T,N}}
-function map(f, t::Any16)
+const Any32{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
+                       Any,Any,Any,Any,Any,Any,Any,Any,
+                       Any,Any,Any,Any,Any,Any,Any,Any,
+                       Any,Any,Any,Any,Any,Any,Any,Any,
+                       Vararg{Any,N}}
+const All32{T,N} = Tuple{T,T,T,T,T,T,T,T,
+                         T,T,T,T,T,T,T,T,
+                         T,T,T,T,T,T,T,T,
+                         T,T,T,T,T,T,T,T,
+                         Vararg{T,N}}
+function map(f, t::Any32)
     n = length(t)
     A = Vector{Any}(undef, n)
     for i=1:n
@@ -229,13 +242,13 @@ function map(f, t::Any16)
 end
 # 2 argument function
 map(f, t::Tuple{},        s::Tuple{})        = ()
-map(f, t::Tuple{Any,},    s::Tuple{Any,})    = (@_inline_meta; (f(t[1],s[1]),))
-map(f, t::Tuple{Any,Any}, s::Tuple{Any,Any}) = (@_inline_meta; (f(t[1],s[1]), f(t[2],s[2])))
+map(f, t::Tuple{Any,},    s::Tuple{Any,})    = (@inline; (f(t[1],s[1]),))
+map(f, t::Tuple{Any,Any}, s::Tuple{Any,Any}) = (@inline; (f(t[1],s[1]), f(t[2],s[2])))
 function map(f, t::Tuple, s::Tuple)
-    @_inline_meta
+    @inline
     (f(t[1],s[1]), map(f, tail(t), tail(s))...)
 end
-function map(f, t::Any16, s::Any16)
+function map(f, t::Any32, s::Any32)
     n = length(t)
     A = Vector{Any}(undef, n)
     for i = 1:n
@@ -248,10 +261,10 @@ heads(ts::Tuple...) = map(t -> t[1], ts)
 tails(ts::Tuple...) = map(tail, ts)
 map(f, ::Tuple{}...) = ()
 function map(f, t1::Tuple, t2::Tuple, ts::Tuple...)
-    @_inline_meta
+    @inline
     (f(heads(t1, t2, ts...)...), map(f, tails(t1, t2, ts...)...)...)
 end
-function map(f, t1::Any16, t2::Any16, ts::Any16...)
+function map(f, t1::Any32, t2::Any32, ts::Any32...)
     n = length(t1)
     A = Vector{Any}(undef, n)
     for i = 1:n
@@ -268,7 +281,7 @@ fill_to_length(t::Tuple{}, val, ::Val{1}) = (val,)
 fill_to_length(t::Tuple{Any}, val, ::Val{2}) = (t..., val)
 fill_to_length(t::Tuple{}, val, ::Val{2}) = (val, val)
 #function fill_to_length(t::Tuple, val, ::Val{N}) where {N}
-#    @_inline_meta
+#    @inline
 #    return (t..., ntuple(i -> val, N - length(t))...)
 #end
 
@@ -305,20 +318,24 @@ Tuple(x::Array{T,0}) where {T} = tuple(getindex(x))
 _totuple(::Type{Tuple{}}, itr, s...) = ()
 
 function _totuple_err(@nospecialize T)
-    @_noinline_meta
+    @noinline
     throw(ArgumentError("too few elements for tuple type $T"))
 end
 
-function _totuple(T, itr, s...)
-    @_inline_meta
+function _totuple(::Type{T}, itr, s::Vararg{Any,N}) where {T,N}
+    @inline
     y = iterate(itr, s...)
     y === nothing && _totuple_err(T)
-    return (convert(fieldtype(T, 1), y[1]), _totuple(tuple_type_tail(T), itr, y[2])...)
+    t1 = convert(fieldtype(T, 1), y[1])
+    # inference may give up in recursive calls, so annotate here to force accurate return type to be propagated
+    rT = tuple_type_tail(T)
+    ts = _totuple(rT, itr, y[2])::rT
+    return (t1, ts...)
 end
 
 # use iterative algorithm for long tuples
-function _totuple(T::Type{All16{E,N}}, itr) where {E,N}
-    len = N+16
+function _totuple(T::Type{All32{E,N}}, itr) where {E,N}
+    len = N+32
     elts = collect(E, Iterators.take(itr,len))
     if length(elts) != len
         _totuple_err(T)
@@ -342,15 +359,19 @@ end
 filter(f, xs::Tuple) = afoldl((ys, x) -> f(x) ? (ys..., x) : ys, (), xs...)
 
 # use Array for long tuples
-filter(f, t::Any16) = Tuple(filter(f, collect(t)))
+filter(f, t::Any32) = Tuple(filter(f, collect(t)))
 
 ## comparison ##
 
-isequal(t1::Tuple, t2::Tuple) = (length(t1) == length(t2)) && _isequal(t1, t2)
-_isequal(t1::Tuple{}, t2::Tuple{}) = true
-_isequal(t1::Tuple{Any}, t2::Tuple{Any}) = isequal(t1[1], t2[1])
-_isequal(t1::Tuple, t2::Tuple) = isequal(t1[1], t2[1]) && _isequal(tail(t1), tail(t2))
-function _isequal(t1::Any16, t2::Any16)
+isequal(t1::Tuple, t2::Tuple) = length(t1) == length(t2) && _isequal(t1, t2)
+_isequal(::Tuple{}, ::Tuple{}) = true
+function _isequal(t1::Tuple{Any,Vararg{Any,N}}, t2::Tuple{Any,Vararg{Any,N}}) where {N}
+    isequal(t1[1], t2[1]) || return false
+    t1, t2 = tail(t1), tail(t2)
+    # avoid dynamic dispatch by telling the compiler relational invariants
+    return isa(t1, Tuple{}) ? true : _isequal(t1, t2::Tuple{Any,Vararg{Any}})
+end
+function _isequal(t1::Any32, t2::Any32)
     for i = 1:length(t1)
         if !isequal(t1[i], t2[i])
             return false
@@ -380,7 +401,7 @@ function _eq_missing(t1::Tuple, t2::Tuple)
         return _eq_missing(tail(t1), tail(t2))
     end
 end
-function _eq(t1::Any16, t2::Any16)
+function _eq(t1::Any32, t2::Any32)
     anymissing = false
     for i = 1:length(t1)
         eq = (t1[i] == t2[i])
@@ -396,7 +417,7 @@ end
 const tuplehash_seed = UInt === UInt64 ? 0x77cfa1eef01bca90 : 0xf01bca90
 hash(::Tuple{}, h::UInt) = h + tuplehash_seed
 hash(t::Tuple, h::UInt) = hash(t[1], hash(tail(t), h))
-function hash(t::Any16, h::UInt)
+function hash(t::Any32, h::UInt)
     out = h + tuplehash_seed
     for i = length(t):-1:1
         out = hash(t[i], out)
@@ -417,7 +438,7 @@ function <(t1::Tuple, t2::Tuple)
     end
     return tail(t1) < tail(t2)
 end
-function <(t1::Any16, t2::Any16)
+function <(t1::Any32, t2::Any32)
     n1, n2 = length(t1), length(t2)
     for i = 1:min(n1, n2)
         a, b = t1[i], t2[i]
@@ -444,7 +465,7 @@ function isless(t1::Tuple, t2::Tuple)
     a, b = t1[1], t2[1]
     isless(a, b) || (isequal(a, b) && isless(tail(t1), tail(t2)))
 end
-function isless(t1::Any16, t2::Any16)
+function isless(t1::Any32, t2::Any32)
     n1, n2 = length(t1), length(t2)
     for i = 1:min(n1, n2)
         a, b = t1[i], t2[i]
@@ -467,17 +488,12 @@ reverse(t::Tuple) = revargs(t...)
 
 ## specialized reduction ##
 
-# TODO: these definitions cannot yet be combined, since +(x...)
-# where x might be any tuple matches too many methods.
-# TODO: this is inconsistent with the regular sum in cases where the arguments
-# require size promotion to system size.
-sum(x::Tuple{Any, Vararg{Any}}) = +(x...)
-
-# NOTE: should remove, but often used on array sizes
-# TODO: this is inconsistent with the regular prod in cases where the arguments
-# require size promotion to system size.
 prod(x::Tuple{}) = 1
-prod(x::Tuple{Any, Vararg{Any}}) = *(x...)
+# This is consistent with the regular prod because there is no need for size promotion
+# if all elements in the tuple are of system size.
+# It is defined here separately in order to support bootstrap, because it's needed earlier
+# than the general prod definition is available.
+prod(x::Tuple{Int, Vararg{Int}}) = *(x...)
 
 all(x::Tuple{}) = true
 all(x::Tuple{Bool}) = x[1]
@@ -493,7 +509,7 @@ any(x::Tuple{Bool, Bool, Bool}) = x[1]|x[2]|x[3]
 # equivalent to any(f, t), to be used only in bootstrap
 _tuple_any(f::Function, t::Tuple) = _tuple_any(f, false, t...)
 function _tuple_any(f::Function, tf::Bool, a, b...)
-    @_inline_meta
+    @inline
     _tuple_any(f, tf | f(a), b...)
 end
 _tuple_any(f::Function, tf::Bool) = tf
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index 4df9c072f78eed..7f338ce98a1f5d 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -194,6 +194,10 @@ function TwicePrecision{T}(x) where {T}
     TwicePrecision{T}(xT, T(Δx))
 end
 
+function TwicePrecision{T}(x::TwicePrecision) where {T}
+    TwicePrecision{T}(x.hi, x.lo)
+end
+
 TwicePrecision{T}(i::Integer) where {T<:AbstractFloat} =
     TwicePrecision{T}(canonicalize2(splitprec(T, i)...)...)
 
@@ -207,13 +211,21 @@ end
 
 function TwicePrecision{T}(nd::Tuple{Any,Any}) where {T}
     n, d = nd
-    TwicePrecision{T}(n) / d
+    TwicePrecision{T}(TwicePrecision{T}(n) / d)
 end
 
 function TwicePrecision{T}(nd::Tuple{I,I}, nb::Integer) where {T,I}
     twiceprecision(TwicePrecision{T}(nd), nb)
 end
 
+# Fix #39798
+# See steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer},
+#                         step::Tuple{Integer,Integer}, nb::Integer,
+#                         len::Integer, offset::Integer)
+function TwicePrecision{T}(nd::Tuple{Integer,Integer}, nb::Integer) where T
+    twiceprecision(TwicePrecision{T}(nd), nb)
+end
+
 # Truncating constructors. Useful for generating values that can be
 # exactly multiplied by small integers.
 function twiceprecision(val::T, nb::Integer) where {T<:IEEEFloat}
@@ -321,13 +333,13 @@ function steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer},
                          step::Tuple{Integer,Integer}, nb::Integer,
                          len::Integer, offset::Integer)
     StepRangeLen(TwicePrecision{Float64}(ref),
-                 TwicePrecision{Float64}(step, nb), Int(len), offset)
+                 TwicePrecision{Float64}(step, nb), len, offset)
 end
 
 function steprangelen_hp(::Type{T}, ref::Tuple{Integer,Integer},
                          step::Tuple{Integer,Integer}, nb::Integer,
                          len::Integer, offset::Integer) where {T<:IEEEFloat}
-    StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], Int(len), offset)
+    StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], len, offset)
 end
 
 # AbstractFloat constructors (can supply a single number or a 2-tuple
@@ -339,14 +351,13 @@ function steprangelen_hp(::Type{Float64}, ref::F_or_FF,
                          step::F_or_FF, nb::Integer,
                          len::Integer, offset::Integer)
     StepRangeLen(TwicePrecision{Float64}(ref...),
-                 twiceprecision(TwicePrecision{Float64}(step...), nb), Int(len), offset)
+                 twiceprecision(TwicePrecision{Float64}(step...), nb), len, offset)
 end
 
 function steprangelen_hp(::Type{T}, ref::F_or_FF,
                          step::F_or_FF, nb::Integer,
                          len::Integer, offset::Integer) where {T<:IEEEFloat}
-    StepRangeLen{T}(asF64(ref),
-                    asF64(step), Int(len), offset)
+    StepRangeLen{T}(asF64(ref), asF64(step), len, offset)
 end
 
 
@@ -357,30 +368,33 @@ StepRangeLen(ref::TwicePrecision{T}, step::TwicePrecision{T},
 
 # Construct range for rational start=start_n/den, step=step_n/den
 function floatrange(::Type{T}, start_n::Integer, step_n::Integer, len::Integer, den::Integer) where T
+    len = len + 0 # promote with Int
     if len < 2 || step_n == 0
-        return steprangelen_hp(T, (start_n, den), (step_n, den), 0, Int(len), 1)
+        return steprangelen_hp(T, (start_n, den), (step_n, den), 0, len, oneunit(len))
     end
     # index of smallest-magnitude value
-    imin = clamp(round(Int, -start_n/step_n+1), 1, Int(len))
+    L = typeof(len)
+    imin = clamp(round(typeof(len), -start_n/step_n+1), oneunit(L), len)
     # Compute smallest-magnitude element to 2x precision
     ref_n = start_n+(imin-1)*step_n  # this shouldn't overflow, so don't check
     nb = nbitslen(T, len, imin)
-    steprangelen_hp(T, (ref_n, den), (step_n, den), nb, Int(len), imin)
+    steprangelen_hp(T, (ref_n, den), (step_n, den), nb, len, imin)
 end
 
 function floatrange(a::AbstractFloat, st::AbstractFloat, len::Real, divisor::AbstractFloat)
+    len = len + 0 # promote with Int
     T = promote_type(typeof(a), typeof(st), typeof(divisor))
     m = maxintfloat(T, Int)
     if abs(a) <= m && abs(st) <= m && abs(divisor) <= m
         ia, ist, idivisor = round(Int, a), round(Int, st), round(Int, divisor)
         if ia == a && ist == st && idivisor == divisor
             # We can return the high-precision range
-            return floatrange(T, ia, ist, Int(len), idivisor)
+            return floatrange(T, ia, ist, len, idivisor)
         end
     end
     # Fallback (misses the opportunity to set offset different from 1,
     # but otherwise this is still high-precision)
-    steprangelen_hp(T, (a,divisor), (st,divisor), nbitslen(T, len, 1), Int(len), 1)
+    steprangelen_hp(T, (a,divisor), (st,divisor), nbitslen(T, len, 1), len, oneunit(len))
 end
 
 function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
@@ -399,7 +413,7 @@ function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
                     rem(den, start_d) == 0 && rem(den, step_d) == 0      # check lcm overflow
                 start_n = round(Int, start*den)
                 step_n = round(Int, step*den)
-                len = max(0, div(den*stop_n - stop_d*start_n + step_n*stop_d, step_n*stop_d))
+                len = max(0, Int(div(den*stop_n - stop_d*start_n + step_n*stop_d, step_n*stop_d)))
                 # Integer ops could overflow, so check that this makes sense
                 if isbetween(start, start + (len-1)*step, stop + step/2) &&
                         !isbetween(start, start + len*step, stop)
@@ -410,6 +424,7 @@ function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
         end
     end
     # Fallback, taking start and step literally
+    # n.b. we use Int as the default length type for IEEEFloats
     lf = (stop-start)/step
     if lf < 0
         len = 0
@@ -428,6 +443,7 @@ step(r::StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}) where {T<:AbstractF
 step(r::StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}) where {T} = T(r.step)
 
 function range_start_step_length(a::T, st::T, len::Integer) where T<:Union{Float16,Float32,Float64}
+    len = len + 0 # promote with Int
     start_n, start_d = rat(a)
     step_n, step_d = rat(st)
     if start_d != 0 && step_d != 0 &&
@@ -447,7 +463,7 @@ end
 # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact
 function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T
     # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12
-    @_inline_meta
+    @inline
     i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     u = i - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
@@ -466,31 +482,38 @@ end
 
 function getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, s::OrdinalRange{S}) where {T, S<:Integer}
     @boundscheck checkbounds(r, s)
+    len = length(s)
+    L = typeof(len)
+    sstep = step_hp(s)
+    rstep = step_hp(r)
     if S === Bool
-        if length(s) == 0
-            return StepRangeLen(r.ref, r.step, 0, 1)
-        elseif length(s) == 1
+        #rstep *= one(sstep)
+        if len == 0
+            return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
+        elseif len == 1
             if first(s)
-                return StepRangeLen(r.ref, r.step, 1, 1)
+                return StepRangeLen{T}(first(r), rstep, oneunit(L), oneunit(L))
             else
-                return StepRangeLen(r.ref, r.step, 0, 1)
+                return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
             end
-        else # length(s) == 2
-            return StepRangeLen(r[2], step(r), 1, 1)
+        else # len == 2
+            return StepRangeLen{T}(last(r), step(r), oneunit(L), oneunit(L))
         end
     else
-        soffset = 1 + round(Int, (r.offset - first(s))/step(s))
-        soffset = clamp(soffset, 1, length(s))
-        ioffset = first(s) + (soffset-1)*step(s)
-        if step(s) == 1 || length(s) < 2
-            newstep = r.step
+        soffset = round(L, (r.offset - first(s))/sstep + 1)
+        soffset = clamp(soffset, oneunit(L), len)
+        ioffset = L(first(s) + (soffset - oneunit(L)) * sstep)
+        if sstep == 1 || len < 2
+            newstep = rstep #* one(sstep)
         else
-            newstep = twiceprecision(r.step*step(s), nbitslen(T, length(s), soffset))
+            newstep = rstep * sstep
+            newstep = twiceprecision(newstep, nbitslen(T, len, soffset))
         end
+        soffset = max(oneunit(L), soffset)
         if ioffset == r.offset
-            return StepRangeLen(r.ref, newstep, length(s), max(1,soffset))
+            return StepRangeLen{T}(r.ref, newstep, len, soffset)
         else
-            return StepRangeLen(r.ref + (ioffset-r.offset)*r.step, newstep, length(s), max(1,soffset))
+            return StepRangeLen{T}(r.ref + (ioffset-r.offset)*rstep, newstep, len, soffset)
         end
     end
 end
@@ -501,30 +524,30 @@ end
 /(r::StepRangeLen{<:Real,<:TwicePrecision}, x::Real) =
     StepRangeLen(r.ref/x, twiceprecision(r.step/x, nbitslen(r)), length(r), r.offset)
 
-StepRangeLen{T,R,S}(r::StepRangeLen{T,R,S}) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision} = r
+StepRangeLen{T,R,S,L}(r::StepRangeLen{T,R,S,L}) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision,L} = r
 
-StepRangeLen{T,R,S}(r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision} =
-    _convertSRL(StepRangeLen{T,R,S}, r)
+StepRangeLen{T,R,S,L}(r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision,L} =
+    _convertSRL(StepRangeLen{T,R,S,L}, r)
 
 StepRangeLen{Float64}(r::StepRangeLen) =
-    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r)
+    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64},Int}, r)
 StepRangeLen{T}(r::StepRangeLen) where {T<:IEEEFloat} =
-    _convertSRL(StepRangeLen{T,Float64,Float64}, r)
+    _convertSRL(StepRangeLen{T,Float64,Float64,Int}, r)
 
 StepRangeLen{Float64}(r::AbstractRange) =
-    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r)
+    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64},Int}, r)
 StepRangeLen{T}(r::AbstractRange) where {T<:IEEEFloat} =
-    _convertSRL(StepRangeLen{T,Float64,Float64}, r)
+    _convertSRL(StepRangeLen{T,Float64,Float64,Int}, r)
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{<:Integer}) where {T,R,S}
-    StepRangeLen{T,R,S}(R(r.ref), S(r.step), length(r), r.offset)
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::StepRangeLen{<:Integer}) where {T,R,S,L}
+    StepRangeLen{T,R,S,L}(R(r.ref), S(r.step), L(length(r)), L(r.offset))
 end
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{<:Integer}) where {T,R,S}
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{<:Integer}) where {T,R,S,L}
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), L(length(r)))
 end
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,R,S,U}
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{U}) where {T,R,S,L,U}
     # if start and step have a rational approximation in the old type,
     # then we transfer that rational approximation to the new type
     f, s = first(r), step(r)
@@ -538,17 +561,17 @@ function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,
                 rem(den, start_d) == 0 && rem(den, step_d) == 0
             start_n = round(Int, f*den)
             step_n = round(Int, s*den)
-            return floatrange(T, start_n, step_n, length(r), den)
+            return floatrange(T, start_n, step_n, L(length(r)), den)
         end
     end
-    __convertSRL(StepRangeLen{T,R,S}, r)
+    return __convertSRL(StepRangeLen{T,R,S,L}, r)
 end
 
-function __convertSRL(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{U}) where {T,R,S,U}
-    StepRangeLen{T,R,S}(R(r.ref), S(r.step), length(r), r.offset)
+function __convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::StepRangeLen{U}) where {T,R,S,L,U}
+    StepRangeLen{T,R,S,L}(R(r.ref), S(r.step), L(length(r)), L(r.offset))
 end
-function __convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,R,S,U}
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+function __convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{U}) where {T,R,S,L,U}
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), L(length(r)))
 end
 
 function sum(r::StepRangeLen)
@@ -559,7 +582,7 @@ function sum(r::StepRangeLen)
     np, nn = l - r.offset, r.offset - 1  # positive, negative
     # To prevent overflow in sum(1:n), multiply its factors by the step
     sp, sn = sumpair(np), sumpair(nn)
-    W = widen(Int)
+    W = widen(typeof(l))
     Δn = W(sp[1]) * W(sp[2]) - W(sn[1]) * W(sn[2])
     s = r.step * Δn
     # Add in contributions of ref
@@ -595,19 +618,20 @@ function +(r1::StepRangeLen{T,R}, r2::StepRangeLen{T,R}) where T where R<:TwiceP
         imid = r1.offset
         ref = r1.ref + r2.ref
     else
-        imid = round(Int, (r1.offset+r2.offset)/2)
+        imid = round(typeof(len), (r1.offset+r2.offset)/2)
         ref1mid = _getindex_hiprec(r1, imid)
         ref2mid = _getindex_hiprec(r2, imid)
         ref = ref1mid + ref2mid
     end
     step = twiceprecision(r1.step + r2.step, nbitslen(T, len, imid))
-    StepRangeLen{T,typeof(ref),typeof(step)}(ref, step, len, imid)
+    StepRangeLen{T,typeof(ref),typeof(step),typeof(len)}(ref, step, len, imid)
 end
 
 ## LinRange
 
 # For Float16, Float32, and Float64, this returns a StepRangeLen
 function range_start_stop_length(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
+    len = len + 0 # promote with Int
     len < 2 && return _linspace1(T, start, stop, len)
     if start == stop
         return steprangelen_hp(T, start, zero(T), 0, len, 1)
@@ -630,32 +654,35 @@ function range_start_stop_length(start::T, stop::T, len::Integer) where {T<:IEEE
 end
 
 function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
+    len = len + 0 # promote with Int
     (isfinite(start) && isfinite(stop)) || throw(ArgumentError("start and stop must be finite, got $start and $stop"))
     # Find the index that returns the smallest-magnitude element
     Δ, Δfac = stop-start, 1
     if !isfinite(Δ)   # handle overflow for large endpoints
-        Δ, Δfac = stop/len - start/len, Int(len)
+        Δ, Δfac = stop/len - start/len, len
     end
     tmin = -(start/Δ)/Δfac            # t such that (1-t)*start + t*stop == 0
-    imin = round(Int, tmin*(len-1)+1) # index approximately corresponding to t
+    L = typeof(len)
+    lenn1 = len - oneunit(L)
+    imin = round(L, tmin*lenn1 + 1) # index approximately corresponding to t
     if 1 < imin < len
         # The smallest-magnitude element is in the interior
-        t = (imin-1)/(len-1)
+        t = (imin - 1)/lenn1
         ref = T((1-t)*start + t*stop)
         step = imin-1 < len-imin ? (ref-start)/(imin-1) : (stop-ref)/(len-imin)
     elseif imin <= 1
-        imin = 1
+        imin = oneunit(L)
         ref = start
-        step = (Δ/(len-1))*Δfac
+        step = (Δ/(lenn1))*Δfac
     else
-        imin = Int(len)
+        imin = len
         ref = stop
-        step = (Δ/(len-1))*Δfac
+        step = (Δ/(lenn1))*Δfac
     end
     if len == 2 && !isfinite(step)
         # For very large endpoints where step overflows, exploit the
         # split-representation to handle the overflow
-        return steprangelen_hp(T, start, (-start, stop), 0, 2, 1)
+        return steprangelen_hp(T, start, (-start, stop), 0, len, oneunit(L))
     end
     # 2x calculations to get high precision endpoint matching while also
     # preventing overflow in ref_hi+(i-offset)*step_hi
@@ -668,23 +695,28 @@ function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
     a, b = (start - x1_hi) - x1_lo, (stop - x2_hi) - x2_lo
     step_lo = (b - a)/(len - 1)
     ref_lo = a - (1 - imin)*step_lo
-    steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, Int(len), imin)
+    steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, len, imin)
 end
 
 # range for rational numbers, start = start_n/den, stop = stop_n/den
 # Note this returns a StepRangeLen
-_linspace(::Type{T}, start::Integer, stop::Integer, len::Integer) where {T<:IEEEFloat} = _linspace(T, start, stop, len, 1)
+_linspace(::Type{T}, start::Integer, stop::Integer, len::Integer) where {T<:IEEEFloat} = _linspace(T, start, stop, len, one(start))
 function _linspace(::Type{T}, start_n::Integer, stop_n::Integer, len::Integer, den::Integer) where T<:IEEEFloat
+    len = len + 0 # promote with Int
     len < 2 && return _linspace1(T, start_n/den, stop_n/den, len)
-    start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len, 1)
+    L = typeof(len)
+    start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len, oneunit(L))
     tmin = -start_n/(Float64(stop_n) - Float64(start_n))
-    imin = round(Int, tmin*(len-1)+1)
-    imin = clamp(imin, 1, Int(len))
-    ref_num = Int128(len-imin) * start_n + Int128(imin-1) * stop_n
-    ref_denom = Int128(len-1) * den
+    imin = round(typeof(len), tmin*(len-1)+1)
+    imin = clamp(imin, oneunit(L), len)
+    W = widen(L)
+    start_n = W(start_n)
+    stop_n = W(stop_n)
+    ref_num = W(len-imin) * start_n + W(imin-1) * stop_n
+    ref_denom = W(len-1) * den
     ref = (ref_num, ref_denom)
-    step_full = (Int128(stop_n) - Int128(start_n), ref_denom)
-    steprangelen_hp(T, ref, step_full,  nbitslen(T, len, imin), Int(len), imin)
+    step_full = (stop_n - start_n, ref_denom)
+    steprangelen_hp(T, ref, step_full, nbitslen(T, len, imin), len, imin)
 end
 
 # For len < 2
@@ -696,7 +728,7 @@ function _linspace1(::Type{T}, start, stop, len::Integer) where T<:IEEEFloat
         # The output type must be consistent with steprangelen_hp
         if T<:Union{Float32,Float16}
             return StepRangeLen{T}(Float64(start), Float64(start) - Float64(stop), len, 1)
-        else
+        else # T == Float64
             return StepRangeLen(TwicePrecision(start, zero(T)), TwicePrecision(start, -stop), len, 1)
         end
     end
@@ -705,8 +737,8 @@ end
 
 ### Numeric utilities
 
-# Approximate x with a rational representation. Guaranteed to return,
-# but not guaranteed to return a precise answer.
+# Approximate x with a rational representation as a pair of Int values.
+# Guaranteed to return, but not guaranteed to return a precise answer.
 # https://en.wikipedia.org/wiki/Continued_fraction#Best_rational_approximations
 function rat(x)
     y = x
@@ -714,7 +746,7 @@ function rat(x)
     b = c = 0
     m = maxintfloat(narrow(typeof(x)), Int)
     while abs(y) <= m
-        f = trunc(Int,y)
+        f = trunc(Int, y)
         y -= f
         a, c = f*a + c, a
         b, d = f*b + d, b
@@ -734,7 +766,7 @@ narrow(::Type{Float32}) = Float16
 narrow(::Type{Float16}) = Float16
 
 function _tp_prod(t::TwicePrecision, x, y...)
-    @_inline_meta
+    @inline
     _tp_prod(t * x, y...)
 end
 _tp_prod(t::TwicePrecision) = t
diff --git a/base/util.jl b/base/util.jl
index e91ab3780824f0..f26823cc69ad81 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -67,7 +67,9 @@ Printing with the color `:nothing` will print the string without modifications.
 """
 text_colors
 
-function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}, io::IO, args...; bold::Bool = false)
+function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}, io::IO, args...;
+        bold::Bool = false, underline::Bool = false, blink::Bool = false,
+        reverse::Bool = false, hidden::Bool = false)
     buf = IOBuffer()
     iscolor = get(io, :color, false)::Bool
     try f(IOContext(buf, io), args...)
@@ -77,9 +79,22 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
             print(io, str)
         else
             bold && color === :bold && (color = :nothing)
+            underline && color === :underline && (color = :nothing)
+            blink && color === :blink && (color = :nothing)
+            reverse && color === :reverse && (color = :nothing)
+            hidden && color === :hidden && (color = :nothing)
             enable_ansi  = get(text_colors, color, text_colors[:default]) *
-                               (bold ? text_colors[:bold] : "")
-            disable_ansi = (bold ? disable_text_style[:bold] : "") *
+                               (bold ? text_colors[:bold] : "") *
+                               (underline ? text_colors[:underline] : "") *
+                               (blink ? text_colors[:blink] : "") *
+                               (reverse ? text_colors[:reverse] : "") *
+                               (hidden ? text_colors[:hidden] : "")
+
+            disable_ansi = (hidden ? disable_text_style[:hidden] : "") *
+                           (reverse ? disable_text_style[:reverse] : "") *
+                           (blink ? disable_text_style[:blink] : "") *
+                           (underline ? disable_text_style[:underline] : "") *
+                           (bold ? disable_text_style[:bold] : "") *
                                get(disable_text_style, color, text_colors[:default])
             first = true
             for line in split(str, '\n')
@@ -94,18 +109,23 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
 end
 
 """
-    printstyled([io], xs...; bold::Bool=false, color::Union{Symbol,Int}=:normal)
+    printstyled([io], xs...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Symbol,Int}=:normal)
 
 Print `xs` in a color specified as a symbol or integer, optionally in bold.
 
 `color` may take any of the values $(Base.available_text_colors_docstring)
 or an integer between 0 and 255 inclusive. Note that not all terminals support 256 colors.
 If the keyword `bold` is given as `true`, the result will be printed in bold.
+If the keyword `underline` is given as `true`, the result will be printed underlined.
+If the keyword `blink` is given as `true`, the result will blink.
+If the keyword `reverse` is given as `true`, the result will have foreground and background colors inversed.
+If the keyword `hidden` is given as `true`, the result will be hidden.
+Keywords can be given in any combination.
 """
-printstyled(io::IO, msg...; bold::Bool=false, color::Union{Int,Symbol}=:normal) =
-    with_output_color(print, color, io, msg...; bold=bold)
-printstyled(msg...; bold::Bool=false, color::Union{Int,Symbol}=:normal) =
-    printstyled(stdout, msg...; bold=bold, color=color)
+printstyled(io::IO, msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
+    with_output_color(print, color, io, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden)
+printstyled(msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
+    printstyled(stdout, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color)
 
 """
     Base.julia_cmd(juliapath=joinpath(Sys.BINDIR::String, julia_exename()))
@@ -154,13 +174,14 @@ function julia_cmd(julia=joinpath(Sys.BINDIR::String, julia_exename()))
                   elseif opts.check_bounds == 2
                       "no" # off
                   else
-                      "" # "default"
+                      "" # default = "auto"
                   end
         isempty(check_bounds) || push!(addflags, "--check-bounds=$check_bounds")
     end
     opts.can_inline == 0 && push!(addflags, "--inline=no")
     opts.use_compiled_modules == 0 && push!(addflags, "--compiled-modules=no")
     opts.opt_level == 2 || push!(addflags, "-O$(opts.opt_level)")
+    opts.opt_level_min == 0 || push!(addflags, "--min-optlevel=$(opts.opt_level_min)")
     push!(addflags, "-g$(opts.debug_level)")
     if opts.code_coverage != 0
         # Forward the code-coverage flag only if applicable (if the filename is pid-dependent)
@@ -515,54 +536,6 @@ function _kwdef!(blk, params_args, call_args)
     blk
 end
 
-"""
-    @invoke f(arg::T, ...; kwargs...)
-
-Provides a convenient way to call [`invoke`](@ref);
-`@invoke f(arg1::T1, arg2::T2; kwargs...)` will be expanded into `invoke(f, Tuple{T1,T2}, arg1, arg2; kwargs...)`.
-When an argument's type annotation is omitted, it's specified as `Any` argument, e.g.
-`@invoke f(arg1::T, arg2)` will be expanded into `invoke(f, Tuple{T,Any}, arg1, arg2)`.
-"""
-macro invoke(ex)
-    f, args, kwargs = destructure_callex(ex)
-    arg2typs = map(args) do x
-        is_expr(x, :(::)) ? (x.args...,) : (x, GlobalRef(Core, :Any))
-    end
-    args, argtypes = first.(arg2typs), last.(arg2typs)
-    return esc(:($(GlobalRef(Core, :invoke))($(f), Tuple{$(argtypes...)}, $(args...); $(kwargs...))))
-end
-
-"""
-    @invokelatest f(args...; kwargs...)
-
-Provides a convenient way to call [`Base.invokelatest`](@ref).
-`@invokelatest f(args...; kwargs...)` will simply be expanded into
-`Base.invokelatest(f, args...; kwargs...)`.
-"""
-macro invokelatest(ex)
-    f, args, kwargs = destructure_callex(ex)
-    return esc(:($(GlobalRef(Base, :invokelatest))($(f), $(args...); $(kwargs...))))
-end
-
-function destructure_callex(ex)
-    is_expr(ex, :call) || throw(ArgumentError("a call expression f(args...; kwargs...) should be given"))
-
-    f = first(ex.args)
-    args = []
-    kwargs = []
-    for x in ex.args[2:end]
-        if is_expr(x, :parameters)
-            append!(kwargs, x.args)
-        elseif is_expr(x, :kw)
-            push!(kwargs, x)
-        else
-            push!(args, x)
-        end
-    end
-
-    return f, args, kwargs
-end
-
 # testing
 
 """
diff --git a/base/uuid.jl b/base/uuid.jl
index 16ffdcefccc8d2..ff4df68ddb7c8c 100644
--- a/base/uuid.jl
+++ b/base/uuid.jl
@@ -31,6 +31,11 @@ end
 
 UInt128(u::UUID) = u.value
 
+let
+    uuid_hash_seed = UInt === UInt64 ? 0xd06fa04f86f11b53 : 0x96a1f36d
+    Base.hash(uuid::UUID, h::UInt) = hash(uuid_hash_seed, hash(convert(NTuple{2, UInt64}, uuid), h))
+end
+
 let
 @inline function uuid_kernel(s, i, u)
     _c = UInt32(@inbounds codeunit(s, i))
diff --git a/base/version.jl b/base/version.jl
index 86343dcddabf4f..77676f80e36764 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -107,10 +107,10 @@ end
 
 function tryparse(::Type{VersionNumber}, v::AbstractString)
     v == "∞" && return typemax(VersionNumber)
-    m = match(VERSION_REGEX, v)
+    m = match(VERSION_REGEX, String(v)::String)
     m === nothing && return nothing
     major, minor, patch, minus, prerl, plus, build = m.captures
-    major = parse(VInt, major)
+    major = parse(VInt, major::AbstractString)
     minor = minor !== nothing ? parse(VInt, minor) : VInt(0)
     patch = patch !== nothing ? parse(VInt, patch) : VInt(0)
     if prerl !== nothing && !isempty(prerl) && prerl[1] == '-'
diff --git a/base/version_git.sh b/base/version_git.sh
index d2ac9cb6058a70..c46021097995ec 100644
--- a/base/version_git.sh
+++ b/base/version_git.sh
@@ -5,11 +5,11 @@
 
 echo "# This file was autogenerated in base/version_git.sh"
 echo "struct GitVersionInfo"
-echo "    commit::AbstractString"
-echo "    commit_short::AbstractString"
-echo "    branch::AbstractString"
+echo "    commit::String"
+echo "    commit_short::String"
+echo "    branch::String"
 echo "    build_number::Int"
-echo "    date_string::AbstractString"
+echo "    date_string::String"
 echo "    tagged_commit::Bool"
 echo "    fork_master_distance::Int"
 echo "    fork_master_timestamp::Float64"
diff --git a/base/views.jl b/base/views.jl
index f60dc04094a430..e26359a5c9fd76 100644
--- a/base/views.jl
+++ b/base/views.jl
@@ -42,7 +42,7 @@ function replace_ref_begin_end_!(ex, withex)
                 n = 1
                 J = lastindex(ex.args)
                 for j = 2:J
-                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S)),:($lastindex($S,$n))))
+                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S,$n)),:($lastindex($S,$n))))
                     used_S |= used
                     ex.args[j] = exj
                     if isa(exj,Expr) && exj.head === :...
diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl
index c7ec172af46aa8..8e1a3d480b9951 100644
--- a/base/weakkeydict.jl
+++ b/base/weakkeydict.jl
@@ -84,6 +84,8 @@ empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}()
 IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown()
 
 islocked(wkh::WeakKeyDict) = islocked(wkh.lock)
+lock(wkh::WeakKeyDict) = lock(wkh.lock)
+unlock(wkh::WeakKeyDict) = unlock(wkh.lock)
 lock(f, wkh::WeakKeyDict) = lock(f, wkh.lock)
 trylock(f, wkh::WeakKeyDict) = trylock(f, wkh.lock)
 
diff --git a/cli/Makefile b/cli/Makefile
index 03261c386d2d05..d4a1b2472c24d3 100644
--- a/cli/Makefile
+++ b/cli/Makefile
@@ -6,7 +6,7 @@ include $(JULIAHOME)/Make.inc
 include $(JULIAHOME)/deps/llvm-ver.make
 
 
-HEADERS := $(addprefix $(SRCDIR)/,jl_exports.h loader.h) $(addprefix $(JULIAHOME)/src/,support/platform.h support/dirpath.h jl_exported_data.inc jl_exported_funcs.inc)
+HEADERS := $(addprefix $(SRCDIR)/,jl_exports.h loader.h) $(addprefix $(JULIAHOME)/src/,julia_fasttls.h support/platform.h support/dirpath.h jl_exported_data.inc jl_exported_funcs.inc)
 
 LOADER_CFLAGS = $(JCFLAGS) -I$(BUILDROOT)/src -I$(JULIAHOME)/src -I$(JULIAHOME)/src/support -I$(build_includedir) -ffreestanding
 LOADER_LDFLAGS = $(JLDFLAGS) -ffreestanding -L$(build_shlibdir) -L$(build_libdir)
@@ -70,7 +70,7 @@ $(BUILDDIR)/julia_res.o: $(JULIAHOME)/contrib/windows/julia.rc $(JULIAHOME)/VERS
 	JLVER=`cat $(JULIAHOME)/VERSION` && \
 	JLVERi=`echo $$JLVER | perl -nle \
 		'/^(\d+)\.?(\d*)\.?(\d*)/ && \
-		print int $$1,",",int $$2,",0,",int $$3'` && \
+		print int $$1,",",int $$2,",",int $$3,",0"'` && \
 	$(CROSS_COMPILE)windres $< -O coff -o $@ -DJLVER=$$JLVERi -DJLVER_STR=\\\"$$JLVER\\\"
 EXE_OBJS += $(BUILDDIR)/julia_res.o
 EXE_DOBJS += $(BUILDDIR)/julia_res.o
@@ -100,17 +100,10 @@ libjulia-debug: $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT)
 
 ifeq ($(OS),WINNT)
 # On Windows we need to strip out exported functions from the generated import library.
-# On i686, there's an extra underscore at the beginning
-ifeq ($(ARCH),i686)
-ABI_UNDERSCORE := _\#\#
-else
-ABI_UNDERSCORE :=
-endif
-EXPORTED_FUNCS := $(shell echo -e "#include \"jl_exported_funcs.inc\"\n#define XX(x) $(ABI_UNDERSCORE)x\nJL_EXPORTED_FUNCS(XX)" | $(CPP) -I$(JULIAHOME)/src - | tail -n 1)
-STRIP_EXPORTED_FUNCS := $(patsubst %,--strip-symbol=%,$(EXPORTED_FUNCS))
+STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_strip_symbols.h)
 endif
 
-$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) | $(build_shlibdir) $(build_libdir)
+$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir)
 	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(RPATH_LIB) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia.$(SHLIB_EXT) $@
@@ -120,10 +113,10 @@ ifeq ($(OS), WINNT)
 	@$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a)
 endif
 
-$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) | $(build_shlibdir) $(build_libdir)
+$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir)
 	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
 		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(RPATH_LIB) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT)))
-	@$(INSTALL_NAME_CMD)libjulia-debug.$(SHLIB_EXT) $@.tmp
+	@$(INSTALL_NAME_CMD)libjulia-debug.$(SHLIB_EXT) $@
 ifeq ($(OS), WINNT)
 	@$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a)
 endif
diff --git a/cli/README.md b/cli/README.md
new file mode 100644
index 00000000000000..4021aceb7d8398
--- /dev/null
+++ b/cli/README.md
@@ -0,0 +1,31 @@
+# cli and loader
+
+This directory contains the code used by the Julia loader, implementing the pieces necessary to isolate ourselves from the native dynamic loader enough to reimplement useful features such as RPATH across all platforms.
+This loader comprises the `julia` executable and the `libjulia` library, which are responsible for setting things up such that `libjulia-internal` and any other internal dependencies can be reliably loaded.
+The code is organized in three pieces:
+
+* `loader_exe.c` gets built into the main `julia` executable.  It immediately loads `libjulia`.
+* `loader_lib.c` gets built into the main `libjulia` shared library.  This is the main entrypoint for the Julia runtime loading process, which occurs within `jl_load_repl()`.
+* `trampolines/*.S`, which contains assembly definitions for symbol forwarding trampolines.  These are used to allow `libjulia` to re-export symbols such that a C linker can use `libjulia` directly for embedding usecases.
+
+The main requirements of the loader are as follows:
+
+- **Isolation**: We need to be able to load our own copy of `libgcc_s.so`, etc...
+  On Linux/macOS, proper application of `RPATH` can influence the linker's decisions, however errant `LD_LIBRARY_PATH` entries or system libraries inserted into the build process can still interfere, not to mention Windows' lack of `RPATH`-like capabilities.
+  To address this, the loader is built as a stand-alone binary that does not depend on the large set of dependencies that `libjulia-internal` itself does, and manually `dlopen()`'s a list of dependencies using logic similar to that of an `RPATH`.
+- **Compatibility**: We need to support embedding usecases without forcing embedders to care about all of these things.
+  For linking against the Julia runtime by simply providing `-ljulia` on the link line, we must ensure that all public interfaces, whether function symbols or data symbols, must be exported from `libjulia`.
+  This motivates our usage of function trampolines to re-export functions from `libjulia-internal`, and the reason why all public data symbols are defined within `libjulia`, then imported into `libjulia-internal` for initialization.
+- **Flexibility**: We need to be able to make use of system libraries when requested to do so by the user at build time.
+  Currently, we embed the list of libraries to be `dlopen()`'ed within `libjulia` as a string (See the definition of `DEP_LIBS` in `Make.inc` and its usage in `loader_lib.c`).
+  This is flexible enough as we do not support changing this configuration at runtime, however in the future, we may need to add some simple parsing logic in `loader_lib.c` to inspect a `LocalPreferences.toml` and construct the list of libraries to load from that.
+- **Speed**: This whole process should be fast, especially function trampolines.
+  To this end, we write everything in low-overhead assembly, borrowing inspiration from the PLT trampolines that the linker already generates when using dynamic libraries.
+
+## Public interface definition
+
+The public interface exported by `libjulia` is contained within `.inc` files stored in `src`; one for exported data symbols, [`src/jl_exported_data.inc`](../src/jl_exported_data.inc) and one for exported functions, [`src/jl_exported_funcs.inc`](../src/jl_exported_funcs.inc).
+Adding entries to the data list will cause `libjulia` to generate a placeholder variable declaration.
+Most symbols are declared to be of type `void *`, however for symbols that are of a different size, they are declared along with their type.
+Adding entries to the function list will cause `libjulia` to generate a trampoline definition (using a trampoline according to the architecture of the target processor) and then at runtime, when `libjulia` has successfully loaded `libjulia-internal`, it will `dlsym()` that symbol from within `libjulia-internal` and set it as the target of the trampoline.
+All initialization will occur automatically upon successful load of `libjulia`, so there is no need for user code to call an initialization before invoking typical `libjulia-internal` functions (although initialization of the runtime itself is still necessary, e.g. calling `jl_init()`).
diff --git a/cli/jl_exports.h b/cli/jl_exports.h
index 0d467a6528b754..35d2767726865a 100644
--- a/cli/jl_exports.h
+++ b/cli/jl_exports.h
@@ -10,13 +10,19 @@
 JL_EXPORTED_DATA_POINTERS(XX)
 #undef XX
 
-// Define symbol data as `$type) $(name);`
+// Define symbol data as `$(type) $(name);`
 #define XX(name, type)    JL_DLLEXPORT type name;
 JL_EXPORTED_DATA_SYMBOLS(XX)
 #undef XX
 
-// Define holder locations for function addresses as `const void * $(name)_addr`
-#define XX(name)    JL_HIDDEN const void * name##_addr;
+// Declare list of exported functions (sans type)
+#define XX(name)    JL_DLLEXPORT void name(void);
+typedef void (anonfunc)(void);
+JL_EXPORTED_FUNCS(XX)
+#undef XX
+
+// Define holder locations for function addresses as `const void * $(name)_addr = & $(name);`
+#define XX(name)    JL_HIDDEN anonfunc * name##_addr = (anonfunc*)&name;
 JL_EXPORTED_FUNCS(XX)
 #undef XX
 
@@ -29,7 +35,7 @@ static const char *const jl_exported_func_names[] = {
 #undef XX
 
 #define XX(name)    &name##_addr,
-static const void ** jl_exported_func_addrs[] = {
+static anonfunc **const jl_exported_func_addrs[] = {
     JL_EXPORTED_FUNCS(XX)
     NULL
 };
diff --git a/cli/list_strip_symbols.h b/cli/list_strip_symbols.h
new file mode 100644
index 00000000000000..e1a96261fe05ac
--- /dev/null
+++ b/cli/list_strip_symbols.h
@@ -0,0 +1,7 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "jl_exported_funcs.inc"
+#include "trampolines/common.h"
+#define XX(x) --strip-symbol=CNAME(x)
+JL_EXPORTED_FUNCS(XX)
+#undef XX
diff --git a/cli/loader.h b/cli/loader.h
index 5b1c10abc99982..6df1557ec2c26b 100644
--- a/cli/loader.h
+++ b/cli/loader.h
@@ -3,6 +3,7 @@
 /* Bring in definitions for `_OS_X_`, `PATH_MAX` and `PATHSEPSTRING`, `jl_ptls_t`, etc... */
 #include "../src/support/platform.h"
 #include "../src/support/dirpath.h"
+#include "../src/julia_fasttls.h"
 
 #ifdef _OS_WINDOWS_
 /* We need to reimplement a bunch of standard library stuff on windows,
@@ -43,15 +44,6 @@
 #include <dlfcn.h>
 #endif
 
-// Borrow definitions from `julia.h`
-#if defined(__GNUC__)
-#  define JL_CONST_FUNC __attribute__((const))
-#elif defined(_COMPILER_MICROSOFT_)
-#  define JL_CONST_FUNC __declspec(noalias)
-#else
-#  define JL_CONST_FUNC
-#endif
-
 // Borrow definition from `support/dtypes.h`
 #ifdef _OS_WINDOWS_
 # ifdef LIBRARY_EXPORTS
@@ -68,12 +60,6 @@
 # endif
 #define JL_HIDDEN    __attribute__ ((visibility("hidden")))
 #endif
-#ifdef JL_DEBUG_BUILD
-#define JL_NAKED     __attribute__ ((naked,no_stack_protector))
-#else
-#define JL_NAKED     __attribute__ ((naked))
-#endif
-
 /*
  * DEP_LIBS is our list of dependent libraries that must be loaded before `libjulia`.
  * Note that order matters, as each entry will be opened in-order.  We define here a
diff --git a/cli/loader_exe.c b/cli/loader_exe.c
index e0cfdd93fbee71..e5bb9d1a5fbe7d 100644
--- a/cli/loader_exe.c
+++ b/cli/loader_exe.c
@@ -11,14 +11,15 @@ extern "C" {
 #include "loader_win_utils.c"
 #endif
 
-/* Define ptls getter, as this cannot be defined within a shared library. */
-#if !defined(_OS_WINDOWS_) && !defined(_OS_DARWIN_)
-JL_DLLEXPORT JL_CONST_FUNC void * jl_get_ptls_states_static(void)
+JULIA_DEFINE_FAST_TLS
+
+#ifdef _COMPILER_ASAN_ENABLED_
+JL_DLLEXPORT const char* __asan_default_options()
 {
-    /* Because we can't #include <julia.h> in this file, we define a TLS state object with
-     * hopefully enough room; at last check, the `jl_tls_states_t` struct was <16KB. */
-    static __attribute__((tls_model("local-exec"))) __thread char tls_states[32768];
-    return &tls_states;
+    return "allow_user_segv_handler=1:detect_leaks=0";
+    // FIXME: enable LSAN after fixing leaks & defining __lsan_default_suppressions(),
+    //        or defining __lsan_default_options = exitcode=0 once publicly available
+    //        (here and in flisp/flmain.c)
 }
 #endif
 
@@ -27,16 +28,22 @@ int mainCRTStartup(void)
 {
     int argc;
     LPWSTR * wargv = CommandLineToArgv(GetCommandLine(), &argc);
-    char ** argv = (char **)malloc(sizeof(char *)*(argc+ 1));
+    char ** argv = (char **)malloc(sizeof(char*) * (argc + 1));
     setup_stdio();
 #else
 int main(int argc, char * argv[])
 {
 #endif
 
+#ifdef _COMPILER_ASAN_ENABLED_
+    // ASAN does not support RTLD_DEEPBIND
+    // https://github.com/google/sanitizers/issues/611
+    putenv("LBT_USE_RTLD_DEEPBIND=0");
+#endif
+
     // Convert Windows wchar_t values to UTF8
 #ifdef _OS_WINDOWS_
-    for (int i=0; i<argc; i++) {
+    for (int i = 0; i < argc; i++) {
         size_t max_arg_len = 4*wcslen(wargv[i]);
         argv[i] = (char *)malloc(max_arg_len);
         if (!wchar_to_utf8(wargv[i], argv[i], max_arg_len)) {
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 0249c11f3f6c3c..d921055f082212 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -31,12 +31,27 @@ void jl_loader_print_stderr3(const char * msg1, const char * msg2, const char *
 
 /* Wrapper around dlopen(), with extra relative pathing thrown in*/
 static void * load_library(const char * rel_path, const char * src_dir) {
+    void * handle = NULL;
+
+    // See if a handle is already open to the basename
+    const char *basename = rel_path + strlen(rel_path);
+    while (basename-- > rel_path)
+        if (*basename == PATHSEPSTRING[0] || *basename == '/')
+            break;
+    basename++;
+#if defined(_OS_WINDOWS_)
+    if ((handle = GetModuleHandleW(basename)))
+        return handle;
+#else
+    if ((handle = dlopen(basename, RTLD_NOLOAD | RTLD_NOW | RTLD_GLOBAL)))
+        return handle;
+#endif
+
     char path[2*PATH_MAX + 1] = {0};
     strncat(path, src_dir, sizeof(path) - 1);
     strncat(path, PATHSEPSTRING, sizeof(path) - 1);
     strncat(path, rel_path, sizeof(path) - 1);
 
-    void * handle = NULL;
 #if defined(_OS_WINDOWS_)
     wchar_t wpath[2*PATH_MAX + 1] = {0};
     if (!utf8_to_wchar(path, wpath, 2*PATH_MAX)) {
@@ -130,7 +145,7 @@ JL_DLLEXPORT const char * jl_get_libdir()
 
 void * libjulia_internal = NULL;
 __attribute__((constructor)) void jl_load_libjulia_internal(void) {
-    // Only initalize this once
+    // Only initialize this once
     if (libjulia_internal != NULL) {
         return;
     }
@@ -160,7 +175,12 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
 
     // Once we have libjulia-internal loaded, re-export its symbols:
     for (unsigned int symbol_idx=0; jl_exported_func_names[symbol_idx] != NULL; ++symbol_idx) {
-        (*jl_exported_func_addrs[symbol_idx]) = lookup_symbol(libjulia_internal, jl_exported_func_names[symbol_idx]);
+        void *addr = lookup_symbol(libjulia_internal, jl_exported_func_names[symbol_idx]);
+        if (addr == NULL || addr == *jl_exported_func_addrs[symbol_idx]) {
+            jl_loader_print_stderr3("ERROR: Unable to load ", jl_exported_func_names[symbol_idx], " from libjulia-internal");
+            exit(1);
+        }
+        (*jl_exported_func_addrs[symbol_idx]) = addr;
     }
 }
 
@@ -177,23 +197,24 @@ JL_DLLEXPORT int jl_load_repl(int argc, char * argv[]) {
     }
     // Next, if we're on Linux/FreeBSD, set up fast TLS.
 #if !defined(_OS_WINDOWS_) && !defined(_OS_DARWIN_)
-    void (*jl_set_ptls_states_getter)(void *) = lookup_symbol(libjulia_internal, "jl_set_ptls_states_getter");
-    if (jl_set_ptls_states_getter == NULL) {
-        jl_loader_print_stderr("ERROR: Cannot find jl_set_ptls_states_getter() function within libjulia-internal!\n");
+    void (*jl_pgcstack_setkey)(void*, void*(*)(void)) = lookup_symbol(libjulia_internal, "jl_pgcstack_setkey");
+    if (jl_pgcstack_setkey == NULL) {
+        jl_loader_print_stderr("ERROR: Cannot find jl_pgcstack_setkey() function within libjulia-internal!\n");
         exit(1);
     }
-    void * (*fptr)(void) = lookup_symbol(RTLD_DEFAULT, "jl_get_ptls_states_static");
-    if (fptr == NULL) {
-        jl_loader_print_stderr("ERROR: Cannot find jl_get_ptls_states_static(), must define this symbol within calling executable!\n");
+    void *fptr = lookup_symbol(RTLD_DEFAULT, "jl_get_pgcstack_static");
+    void *(*key)(void) = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_addr_static");
+    if (fptr == NULL || key == NULL) {
+        jl_loader_print_stderr("ERROR: Cannot find jl_get_pgcstack_static(), must define this symbol within calling executable!\n");
         exit(1);
     }
-    jl_set_ptls_states_getter((void *)fptr);
+    jl_pgcstack_setkey(fptr, key);
 #endif
 
     // Load the repl entrypoint symbol and jump into it!
-    int (*entrypoint)(int, char **) = (int (*)(int, char **))lookup_symbol(libjulia_internal, "repl_entrypoint");
+    int (*entrypoint)(int, char **) = (int (*)(int, char **))lookup_symbol(libjulia_internal, "jl_repl_entrypoint");
     if (entrypoint == NULL) {
-        jl_loader_print_stderr("ERROR: Unable to find `repl_entrypoint()` within libjulia-internal!\n");
+        jl_loader_print_stderr("ERROR: Unable to find `jl_repl_entrypoint()` within libjulia-internal!\n");
         exit(1);
     }
     return entrypoint(argc, (char **)argv);
diff --git a/cli/trampolines/common.h b/cli/trampolines/common.h
index 743d697d2467b6..06d7b9e236971d 100644
--- a/cli/trampolines/common.h
+++ b/cli/trampolines/common.h
@@ -23,6 +23,9 @@
                             .ascii STR(-export:##I(name)); \
                             .ascii " "; \
                             .section .text
+#elif defined(__ELF__)
+#define DEBUGINFO(name)     .type CNAME(name),@function
+#define EXPORT(name)        .size CNAME(name), . - CNAME(name)
 #else
 #define DEBUGINFO(name)
 #define EXPORT(name)
diff --git a/contrib/add_license_to_files.jl b/contrib/add_license_to_files.jl
index eecca7dc3f4648..c5aa0f49d99d38 100644
--- a/contrib/add_license_to_files.jl
+++ b/contrib/add_license_to_files.jl
@@ -20,7 +20,7 @@ const rootdirs = [
     "../stdlib",
 ]
 
-# to exculde whole sub directories
+# to exclude whole sub directories
 const excludedirs = [
     # see: https://github.com/JuliaLang/julia/pull/11073#issuecomment-98090053
     "../base/ryu",
diff --git a/contrib/asan/Make.user.asan b/contrib/asan/Make.user.asan
new file mode 100644
index 00000000000000..095f5e548bc530
--- /dev/null
+++ b/contrib/asan/Make.user.asan
@@ -0,0 +1,23 @@
+TOOLCHAIN=$(BUILDROOT)/../toolchain
+BINDIR=$(TOOLCHAIN)/usr/bin
+TOOLDIR=$(TOOLCHAIN)/usr/tools
+
+# use our new toolchain
+USECLANG=1
+override CC=$(BINDIR)/clang
+override CXX=$(TOOLDIR)/clang++
+export ASAN_SYMBOLIZER_PATH=$(TOOLDIR)/llvm-symbolizer
+
+USE_BINARYBUILDER_LLVM=1
+
+override SANITIZE=1
+override SANITIZE_ADDRESS=1
+
+# make the GC use regular malloc/frees, which are hooked by ASAN
+override WITH_GC_DEBUG_ENV=1
+
+# default to a debug build for better line number reporting
+override JULIA_BUILD_MODE=debug
+
+# make ASAN consume less memory
+export ASAN_OPTIONS=detect_leaks=0:fast_unwind_on_malloc=0:allow_user_segv_handler=1:malloc_context_size=2
diff --git a/contrib/asan/Make.user.tools b/contrib/asan/Make.user.tools
new file mode 100644
index 00000000000000..1bd6f97e39111d
--- /dev/null
+++ b/contrib/asan/Make.user.tools
@@ -0,0 +1,2 @@
+USE_BINARYBUILDER_LLVM=1
+BUILD_LLVM_CLANG=1
diff --git a/contrib/asan/build.sh b/contrib/asan/build.sh
new file mode 100755
index 00000000000000..5ef75a78fa2b16
--- /dev/null
+++ b/contrib/asan/build.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+#
+# Usage:
+#     contrib/asan/build.sh <path> [<make_targets>...]
+#
+# Build ASAN-enabled julia.  Given a workspace directory <path>, build
+# ASAN-enabled julia in <path>/asan.  Required toolss are install under
+# <path>/toolchain.  This scripts also takes optional <make_targets> arguments
+# which are passed to `make`.  The default make target is `debug`.
+
+set -ue
+
+# `$WORKSPACE` is a directory in which we create `toolchain` and `asan`
+# sub-directories.
+WORKSPACE="$1"
+shift
+if [ "$WORKSPACE" = "" ]; then
+    echo "Workspace directory must be specified as the first argument" >&2
+    exit 2
+fi
+
+mkdir -pv "$WORKSPACE"
+WORKSPACE="$(cd "$WORKSPACE" && pwd)"
+if [ "$WORKSPACE" = "" ]; then
+    echo "Failed to create the workspace directory." >&2
+    exit 2
+fi
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+JULIA_HOME="$HERE/../../"
+
+echo
+echo "Installing toolchain..."
+
+TOOLCHAIN="$WORKSPACE/toolchain"
+if [ ! -d "$TOOLCHAIN" ]; then
+    make -C "$JULIA_HOME" configure O=$TOOLCHAIN
+    cp "$HERE/Make.user.tools"  "$TOOLCHAIN/Make.user"
+fi
+
+make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools
+
+# TODO: https://github.com/JuliaPackaging/Yggdrasil/issues/3359
+rm "$TOOLCHAIN/usr/tools/clang++"
+ln -s "$TOOLCHAIN/usr/bin/clang" "$TOOLCHAIN/usr/tools/clang++"
+
+echo
+echo "Building Julia..."
+
+BUILD="$WORKSPACE/asan"
+if [ ! -d "$BUILD" ]; then
+    make -C "$JULIA_HOME" configure O="$BUILD"
+    cp "$HERE/Make.user.asan"  "$BUILD/Make.user"
+fi
+
+make -C "$BUILD" "$@"
diff --git a/contrib/asan/check.jl b/contrib/asan/check.jl
new file mode 100755
index 00000000000000..2933aaf3fb4e31
--- /dev/null
+++ b/contrib/asan/check.jl
@@ -0,0 +1,87 @@
+#!/bin/bash
+# -*- mode: julia -*-
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+#
+# Usage:
+#     contrib/asan/check.jl <julia>
+#
+# Check that <julia> is built with ASAN.
+#
+#=
+JULIA="${JULIA:-julia}"
+exec "$JULIA" --startup-file=no --compile=min "${BASH_SOURCE[0]}" "$@"
+=#
+
+function main(args = ARGS)::Int
+    if length(args) != 1
+        @error "Expect a single argument" args
+        return 2
+    end
+    julia, = args
+
+    # It looks like double-free is easy to robustly trigger.
+    code = """
+    @info "Testing a pattern that would trigger ASAN"
+    write(ARGS[1], "started")
+
+    ptr = ccall(:malloc, Ptr{UInt}, (Csize_t,), 256)
+    ccall(:free, Cvoid, (Ptr{UInt},), ptr)
+    ccall(:free, Cvoid, (Ptr{UInt},), ptr)
+
+    @error "Failed to trigger ASAN"
+    """
+
+    local proc
+    timeout = Threads.Atomic{Bool}(false)
+    isstarted = false
+    mktemp() do tmppath, tmpio
+        cmd = `$julia -e $code $tmppath`
+        # Note: Ideally, we set ASAN_SYMBOLIZER_PATH here. But there is no easy
+        # way to find out the path from just a Julia binary.
+
+        @debug "Starting a process" cmd
+        proc = run(pipeline(cmd; stdout, stderr); wait = false)
+        timer = Timer(10)
+        @sync try
+            @async begin
+                try
+                    wait(timer)
+                    true
+                catch err
+                    err isa EOFError || rethrow()
+                    false
+                end && begin
+                    timeout[] = true
+                    kill(proc)
+                end
+            end
+            wait(proc)
+        finally
+            close(timer)
+        end
+
+        # At the very beginning of the process, the `julia` subprocess put a
+        # marker that it is successfully started. This is to avoid mixing
+        # non-functional `julia` binary (or even non-`julia` command) and
+        # correctly working `julia` with ASAN:
+        isstarted = read(tmpio, String) == "started"
+    end
+
+    if timeout[]
+        @error "Timeout waiting for the subprocess"
+        return 1
+    elseif success(proc)
+        @error "ASAN was not triggered"
+        return 1
+    elseif !isstarted
+        @error "Failed to start the process"
+        return 1
+    else
+        @info "ASAN is functional in the Julia binary `$julia`"
+        return 0
+    end
+end
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    exit(main())
+end
diff --git a/contrib/bpftrace/gc_all.bt b/contrib/bpftrace/gc_all.bt
new file mode 100755
index 00000000000000..f78e8f3aa607d8
--- /dev/null
+++ b/contrib/bpftrace/gc_all.bt
@@ -0,0 +1,44 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    $now = nsecs;
+    @time[pid] = $now;
+    @start[pid] = $now;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
+/@start[pid]/
+{
+    $now = nsecs;
+    @stop_the_world_usecs[pid] = hist(($now - @time[pid]) / 1000);
+    @time[pid] = $now;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__end
+/@start[pid]/
+{
+    $now = nsecs;
+    @gc_total_usecs[pid] = hist(($now - @start[pid]) / 1000);
+    @gc_phase_usecs[pid] = hist(($now - @time[pid]) / 1000);
+    @time[pid] = $now;
+    delete(@start[pid]);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__finalizer
+/@time[pid]/
+{
+    @finalizer[pid] = hist((nsecs - @time[pid]) / 1000);
+    delete(@time[pid]);
+}
+
+END
+{
+    clear(@start);
+    clear(@time);
+}
diff --git a/contrib/bpftrace/gc_simple.bt b/contrib/bpftrace/gc_simple.bt
new file mode 100755
index 00000000000000..559f41c41cf72c
--- /dev/null
+++ b/contrib/bpftrace/gc_simple.bt
@@ -0,0 +1,23 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    @start[pid] = nsecs;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__end
+/@start[pid]/
+{
+    @usecs[pid] = hist((nsecs - @start[pid]) / 1000);
+    delete(@start[pid]);
+}
+
+END
+{
+    clear(@start);
+}
diff --git a/contrib/bpftrace/gc_stop_the_world_latency.bt b/contrib/bpftrace/gc_stop_the_world_latency.bt
new file mode 100755
index 00000000000000..8e541bcb421e2d
--- /dev/null
+++ b/contrib/bpftrace/gc_stop_the_world_latency.bt
@@ -0,0 +1,23 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    @start[pid] = nsecs;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
+/@start[pid]/
+{
+    @usecs[pid] = hist((nsecs - @start[pid]) / 1000);
+    delete(@start[pid]);
+}
+
+END
+{
+    clear(@start);
+}
diff --git a/contrib/check-whitespace.sh b/contrib/check-whitespace.sh
index c380d7bdd29691..ff5bd24ab2cbe2 100755
--- a/contrib/check-whitespace.sh
+++ b/contrib/check-whitespace.sh
@@ -35,3 +35,5 @@ if git --no-pager grep --color -n --full-name -e ' $' -- $file_patterns; then
     echo "and then a forced push of the correct branch"
     exit 1
 fi
+
+echo "Whitespace check found no issues"
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index 049506fd464c26..cb9bc8fb019f36 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-if isempty(Base.ARGS) || Base.ARGS[1] !== "0"
+if Base.isempty(Base.ARGS) || Base.ARGS[1] !== "0"
 Sys.__init_build()
 # Prevent this from being put into the Main namespace
 @eval Module() begin
@@ -141,7 +141,10 @@ if Artifacts !== nothing
     precompile_script *= """
     using Artifacts, Base.BinaryPlatforms, Libdl
     artifacts_toml = abspath(joinpath(Sys.STDLIB, "Artifacts", "test", "Artifacts.toml"))
-    # cd(() -> (name = "HelloWorldC"; @artifact_str(name)), dirname(artifacts_toml))
+    artifact_hash("HelloWorldC", artifacts_toml)
+    oldpwd = pwd(); cd(dirname(artifacts_toml))
+    macroexpand(Main, :(@artifact_str("HelloWorldC")))
+    cd(oldpwd)
     artifacts = Artifacts.load_artifacts_toml(artifacts_toml)
     platforms = [Artifacts.unpack_platform(e, "HelloWorldC", artifacts_toml) for e in artifacts["HelloWorldC"]]
     best_platform = select_platform(Dict(p => triplet(p) for p in platforms))
@@ -219,7 +222,11 @@ Profile = get(Base.loaded_modules,
           nothing)
 if Profile !== nothing
     hardcoded_precompile_statements *= """
-    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
     """
 end
 
@@ -247,16 +254,20 @@ function generate_precompile_statements()
               module $pkgname
               end
               """)
-        tmp = tempname()
+        tmp_prec = tempname()
+        tmp_proc = tempname()
         s = """
             pushfirst!(DEPOT_PATH, $(repr(prec_path)));
-            Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp));
+            Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp_prec));
             Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path)))
             $precompile_script
             """
-        run(`$(julia_exepath()) -O0 --sysimage $sysimg --startup-file=no -Cnative -e $s`)
-        for statement in split(read(tmp, String), '\n')
-            push!(statements, statement)
+        run(`$(julia_exepath()) -O0 --sysimage $sysimg --trace-compile=$tmp_proc --startup-file=no -Cnative -e $s`)
+        for f in (tmp_prec, tmp_proc)
+            for statement in split(read(f, String), '\n')
+                occursin("Main.", statement) && continue
+                push!(statements, statement)
+            end
         end
     end
 
@@ -370,7 +381,7 @@ function generate_precompile_statements()
                 # XXX: precompile doesn't currently handle overloaded Vararg arguments very well.
                 # Replacing N with a large number works around it.
                 l = l.args[end]
-                if isexpr(l, :curly) && length(l.args) == 2 && l.args[1] == :Vararg # Vararg{T}
+                if isexpr(l, :curly) && length(l.args) == 2 && l.args[1] === :Vararg # Vararg{T}
                     push!(l.args, 100) # form Vararg{T, 100} instead
                 end
             end
diff --git a/contrib/mac/app/Entitlements.plist b/contrib/mac/app/Entitlements.plist
index b84dccb00f95cb..95c1a02d589585 100644
--- a/contrib/mac/app/Entitlements.plist
+++ b/contrib/mac/app/Entitlements.plist
@@ -4,7 +4,7 @@
 <dict>
 	<key>com.apple.security.automation.apple-events</key>
 	<true/>
-	<key>com.apple.security.cs.get-task-allow</key>
+	<key>com.apple.security.get-task-allow</key>
 	<true/>
 	<key>com.apple.security.cs.allow-dyld-environment-variables</key>
 	<true/>
diff --git a/contrib/mac/app/renotarize_dmg.sh b/contrib/mac/app/renotarize_dmg.sh
index 26a1258cd4682d..f0d6d0a197e5f5 100755
--- a/contrib/mac/app/renotarize_dmg.sh
+++ b/contrib/mac/app/renotarize_dmg.sh
@@ -13,28 +13,28 @@ if [[ -z "${APPLEID}" ]] || [[ -z "${APPLEID_PASSWORD}" ]]; then
     exit 1
 fi
 
-# Translate from `s3://` URL to `https://` url:
+# Use `aws` to download an `s3://` URL, otherwise use `curl`
 URL="$1"
 if [[ "$URL" == s3://* ]]; then
-    # Chop off `s3://`
-    URL="${URL:5}"
-    # Split into bucket.s3.aws.com/path
-    URL="https://${URL%%/*}.s3.amazonaws.com/${URL#*/}"
+    aws s3 cp "${URL}" .
+elif [[ "${URL}" == http* ]]; then
+    # Download .dmg
+    curl -L "${URL}" -O
+else
+    echo "Unknown URL format: '${URL}'" >&2
+    exit 1
 fi
 
-# Download .dmg
-curl -L "${URL}" -O
-
 # Unpack dmg into our `dmg` folder
 rm -rf dmg
+DMG_NAME=$(basename "${URL}")
 
 # Copy app over to our `dmg` folder
 for j in /Volumes/Julia-*; do hdiutil detach "${j}"; done
-hdiutil mount "$(basename "$1")"
+hdiutil mount "${DMG_NAME}"
 cp -Ra /Volumes/Julia-* dmg
 
-# Override some important Makefile variables
-DMG_NAME=$(basename "$1")
+# Autodetect APP_NAME and VOL_NAME
 APP_NAME=$(basename dmg/*.app)
 VOL_NAME=$(basename /Volumes/Julia-*)
 
@@ -47,3 +47,8 @@ for j in /Volumes/Julia-*; do hdiutil detach "${j}"; done
 
 # Run notarization
 make notarize "DMG_NAME=${DMG_NAME}" "APP_NAME=${APP_NAME}" "VOL_NAME=${VOL_NAME}"
+
+# If it was an s3 bucket, auto-upload it
+if [[ "${URL}" == s3://* ]]; then
+    aws s3 cp --acl public-read "${DMG_NAME}" "${URL}"
+fi
diff --git a/contrib/mac/frameworkapp/Makefile b/contrib/mac/frameworkapp/Makefile
index 93392cd4ec3d02..fbca4577df1bcf 100644
--- a/contrib/mac/frameworkapp/Makefile
+++ b/contrib/mac/frameworkapp/Makefile
@@ -76,7 +76,7 @@ $(BUILDROOT)/framework-component.plist: $(JULIAHOME)/contrib/mac/frameworkapp/fr
 # important properties.  Together, the properties allow one "Julia.framework"
 # to exist at a location with multiple versions of Julia within.
 #
-# 1. The component's identifer is versioned to match the bundled framework.
+# 1. The component's identifier is versioned to match the bundled framework.
 # This allows multiple versions of the component to be installed with the
 # Julia.framework/Versions directory.
 # 2. The component-plist identifies the Versions/x.y directory as an upgradable
diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk
index da0c69eed55102..5b8a25ab79b917 100644
--- a/contrib/refresh_checksums.mk
+++ b/contrib/refresh_checksums.mk
@@ -8,6 +8,11 @@
 SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 JULIAHOME := $(abspath $(SRCDIR)/..)
 
+# force a sane / stable configuration
+export LC_ALL=C
+export LANG=C
+.SUFFIXES:
+
 # Default target that will have everything else added to it as a dependency
 all: checksum pack-checksum
 
@@ -19,7 +24,7 @@ CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS))
 NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS))
 
 # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded:
-BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib suitesparse openlibm blastrampoline
+BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline
 BB_GCC_EXPANDED_PROJECTS=openblas csl
 BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools
 # These are non-BB source-only deps
@@ -47,7 +52,7 @@ endef
 # if $(3) is "assert", we set BINARYBUILDER_LLVM_ASSERTS=1
 define checksum_dep
 checksum-$(1)-$(2)-$(3):
-	-$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/deps" $(call make_flags,$(1),$(2),$(3)) checksum-$(1)
+	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/deps" $(call make_flags,$(1),$(2),$(3)) checksum-$(1)
 .PHONY: checksum-$(1)-$(2)-$(3)
 
 # Add this guy to his project target
@@ -73,31 +78,40 @@ $(foreach project,$(BB_GCC_EXPANDED_PROJECTS),$(foreach triplet,$(TRIPLETS),$(fo
 $(foreach project,$(BB_CXX_EXPANDED_PROJECTS),$(foreach triplet,$(NON_CLANG_TRIPLETS),$(foreach cxxstring_abi,cxx11 cxx03,$(eval $(call checksum_dep,$(project),$(triplet)-$(cxxstring_abi))))))
 $(foreach project,$(BB_CXX_EXPANDED_PROJECTS),$(foreach triplet,$(CLANG_TRIPLETS),$(eval $(call checksum_dep,$(project),$(triplet)))))
 
-# Special libLLVM_asserts_jll targets
+# Special libLLVM_asserts_jll/LLVM_assert_jll targets
 $(foreach triplet,$(NON_CLANG_TRIPLETS),$(foreach cxxstring_abi,cxx11 cxx03,$(eval $(call checksum_dep,llvm,$(triplet)-$(cxxstring_abi),assert))))
+$(foreach triplet,$(NON_CLANG_TRIPLETS),$(foreach cxxstring_abi,cxx11 cxx03,$(eval $(call checksum_dep,llvm-tools,$(triplet)-$(cxxstring_abi),assert))))
 $(foreach triplet,$(CLANG_TRIPLETS),$(eval $(call checksum_dep,llvm,$(triplet),assert)))
+$(foreach triplet,$(CLANG_TRIPLETS),$(eval $(call checksum_dep,llvm-tools,$(triplet),assert)))
 
 # External stdlibs
 checksum-stdlibs:
-	-$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/stdlib" checksumall
+	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/stdlib" checksumall
 all: checksum-stdlibs
 .PHONY: checksum-stdlibs
 
 # doc unicode data
 checksum-doc-unicodedata:
-	-$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/doc" checksum-unicodedata
+	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/doc" checksum-unicodedata
 all: checksum-doc-unicodedata
 .PHONY: checksum-doc-unicodedata
 
 # Special LLVM source hashes for optional targets
 checksum-llvm-special-src:
-	-$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/deps" USE_BINARYBUILDER_LLVM=0 DEPS_GIT=0 BUILD_LLDB=1 BUILD_LLVM_CLANG=1 BUILD_CUSTOM_LIBCXX=1 USECLANG=1 checksum-llvm
+	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/deps" USE_BINARYBUILDER_LLVM=0 DEPS_GIT=0 BUILD_LLDB=1 BUILD_LLVM_CLANG=1 BUILD_CUSTOM_LIBCXX=1 USECLANG=1 checksum-llvm
 all: checksum-llvm-special-src
 .PHONY: checksum-llvm-special-src
 
 # merge substring project names to avoid races
 pack-checksum-llvm-tools: | pack-checksum-llvm
+pack-checksum-llvm: | checksum-llvm-tools
 pack-checksum-csl: | pack-checksum-compilersupportlibraries
+pack-checksum-compilersupportlibraries: | checksum-csl
+
+# We need to adjust to the fact that the checksum files are called `suitesparse`
+pack-checksum-libsuitesparse: | pack-checksum-suitesparse
+	@# nothing to do but disable the prefix rule
+pack-checksum-suitesparse: | checksum-libsuitesparse
 
 # define how to pack parallel checksums into a single file format
 pack-checksum-%: FORCE
diff --git a/contrib/windows/build-installer.iss b/contrib/windows/build-installer.iss
index 6648d9e1f528a9..4f5f0259d2f2cb 100644
--- a/contrib/windows/build-installer.iss
+++ b/contrib/windows/build-installer.iss
@@ -103,7 +103,8 @@ Name: "addtopath"; Description: "Add {#AppName} to PATH"; GroupDescription: "{cm
 
 
 [Files]
-Source: "{#SourceDir}\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
+Source: "{#SourceDir}\*"; Excludes: "{#AppMainExeName}"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs;
+Source: "{#SourceDir}\{#AppMainExeName}"; DestDir: "{app}\bin"; Flags: ignoreversion sign;
 
 
 [Icons]
diff --git a/contrib/windows/julia.rc b/contrib/windows/julia.rc
index afef95ca4af0ba..9a82ee1083ad6e 100644
--- a/contrib/windows/julia.rc
+++ b/contrib/windows/julia.rc
@@ -1,13 +1,13 @@
 #include <winver.h>
 #include <winuser.h>
-1 VERSIONINFO
+VS_VERSION_INFO VERSIONINFO
 FILEVERSION     JLVER
 PRODUCTVERSION  JLVER
 /*
-FILEFLAGSMASK  	VS_FF_PRERELEASE
-FILEFLAGS      	VS_FF_PRERELEASE
+FILEFLAGSMASK  	VS_FFI_FILEFLAGSMASK
+FILEFLAGS      	VER_PRIVATEBUILD
 */
-FILEOS         	VOS_DOS_WINDOWS32
+FILEOS         	VOS__WINDOWS32
 FILETYPE       	VFT_APP
 BEGIN
   BLOCK "StringFileInfo"
@@ -18,7 +18,7 @@ BEGIN
       VALUE "FileDescription", "Julia Programming Language"
       VALUE "FileVersion", JLVER_STR
       VALUE "InternalName", "julia"
-      VALUE "LegalCopyright", "(c) 2009-2020 Julia Language"
+      VALUE "LegalCopyright", "(c) 2009-2021 Julia Language"
       VALUE "OriginalFilename", "julia.exe"
       VALUE "ProductName", "Julia"
       VALUE "ProductVersion", JLVER_STR
diff --git a/deps/Makefile b/deps/Makefile
index cc5d7a05f3902e..27b93f444580f4 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -24,7 +24,7 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST)
 # if you are adding a new target, it can help to copy an similar, existing target
 #
 # autoconf configure-driven scripts: pcre unwind gmp mpfr patchelf libuv curl
-# custom Makefile rules: openlibm dsfmt suitesparse-wrapper suitesparse lapack blastrampoline openblas utf8proc objconv libwhich
+# custom Makefile rules: openlibm dsfmt libsuitesparse lapack blastrampoline openblas utf8proc objconv libwhich
 # CMake libs: llvm llvmunwind libgit2 libssh2 mbedtls
 #
 # downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2
@@ -46,10 +46,6 @@ ifeq ($(USE_SYSTEM_CSL), 0)
 DEP_LIBS += csl
 endif
 
-ifeq ($(USE_GPL_LIBS), 1)
-DEP_LIBS += suitesparse-wrapper
-endif
-
 ifeq ($(USE_SYSTEM_LIBUV), 0)
 DEP_LIBS += libuv
 endif
@@ -134,8 +130,8 @@ DEP_LIBS += mpfr
 endif
 
 ifeq ($(USE_GPL_LIBS), 1)
-ifeq ($(USE_SYSTEM_SUITESPARSE), 0)
-DEP_LIBS += suitesparse
+ifeq ($(USE_SYSTEM_LIBSUITESPARSE), 0)
+DEP_LIBS += libsuitesparse
 endif
 endif
 
@@ -166,12 +162,24 @@ ifneq ($(OS), WINNT)
 DEP_LIBS += libwhich
 endif
 
-# unlist targets that have not been converted to use the staged-install
-DEP_LIBS_STAGED := $(DEP_LIBS)
-DEP_LIBS_STAGED := $(filter-out csl,$(DEP_LIBS_STAGED))
-DEP_LIBS_STAGED := $(filter-out suitesparse,$(DEP_LIBS_STAGED))
-DEP_LIBS_STAGED := $(filter-out suitesparse-wrapper,$(DEP_LIBS_STAGED))
+# list all targets
+DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
+	openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
+	objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
+	libsuitesparse
+DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
 
+ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
+DEP_LIBS_ALL := $(filter-out lapack,$(DEP_LIBS_ALL))
+endif
+
+ifeq ($(USE_BINARYBUILDER_LLVM),0)
+DEP_LIBS_ALL := $(filter-out clang llvm-tools,$(DEP_LIBS_ALL))
+endif
+
+ifeq ($(USE_BINARYBUILDER_LIBSUITESPARSE),0)
+DEP_LIBS_STAGED := $(filter-out libsuitesparse,$(DEP_LIBS_STAGED))
+endif
 
 ## Common build target prefixes
 
@@ -183,21 +191,14 @@ compile: $(addprefix compile-, $(DEP_LIBS))
 check: $(addprefix check-, $(DEP_LIBS))
 fastcheck: $(addprefix fastcheck-, $(DEP_LIBS))
 stage: $(addprefix stage-, $(DEP_LIBS_STAGED))
-install: $(addprefix install-, $(DEP_LIBS))
-cleanall: $(addprefix clean-, $(DEP_LIBS))
-distcleanall: $(addprefix distclean-, $(DEP_LIBS))
-	rm -rf $(build_prefix)
-getall: get-llvm get-libuv get-pcre get-openlibm get-dsfmt get-blastrampoline get-openblas get-lapack get-suitesparse get-unwind get-gmp get-mpfr get-patchelf get-utf8proc get-objconv get-mbedtls get-libssh2 get-nghttp2 get-curl get-libgit2 get-libwhich get-zlib get-p7zip get-csl
-
-# If we're building for MacOS, no matter what, `getall` should include `llvmunwind`
-ifeq ($(OS),Darwin)
-getall: get-llvmunwind
-endif
+install: version-check $(addprefix install-, $(DEP_LIBS))
+version-check: $(addprefix version-check-, $(DEP_LIBS_STAGED))
 
-# Same if we're building a purely-source archive, always include `llvmunwind`
-ifeq ($(USE_BINARYBUILDER_LLVMUNWIND),0)
-getall: get-llvmunwind
-endif
+uninstall: $(addprefix uninstall-, $(DEP_LIBS_STAGED_ALL))
+cleanall: $(addprefix clean-, $(DEP_LIBS_ALL))
+distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL))
+	rm -rf $(build_prefix)
+getall: $(addprefix get-, $(DEP_LIBS_ALL))
 
 include $(SRCDIR)/csl.mk
 include $(SRCDIR)/llvm.mk
@@ -209,7 +210,7 @@ include $(SRCDIR)/objconv.mk
 include $(SRCDIR)/blastrampoline.mk
 include $(SRCDIR)/openblas.mk
 include $(SRCDIR)/utf8proc.mk
-include $(SRCDIR)/suitesparse.mk
+include $(SRCDIR)/libsuitesparse.mk
 include $(SRCDIR)/unwind.mk
 include $(SRCDIR)/gmp.mk
 include $(SRCDIR)/mpfr.mk
diff --git a/deps/SuiteSparse_wrapper.c b/deps/SuiteSparse_wrapper.c
deleted file mode 100644
index fc8b612d7671bb..00000000000000
--- a/deps/SuiteSparse_wrapper.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-  SuiteSparse_wrapper.c: Changes made to this file in the Julia repo
-  in deps/SuiteSparse_wrapper.c should be also made in
-  Yggdrasil/S/SuiteSparse and vice versa.
-*/
-
-#include <string.h>
-#include <cholmod.h>
-
-extern size_t jl_cholmod_common_size(void) {
-    return sizeof(cholmod_common);
-}
-
-extern size_t jl_cholmod_sizeof_long(void) {
-    return sizeof(SuiteSparse_long);
-}
-
-extern int jl_cholmod_version(int *ver) {
-    if (ver != (int*) NULL) {
-        ver[0] = CHOLMOD_MAIN_VERSION;
-        ver[1] = CHOLMOD_SUB_VERSION;
-        ver[2] = CHOLMOD_SUBSUB_VERSION;
-    }
-    return CHOLMOD_VERSION;
-}
-
-extern void jl_cholmod_common_offsets(size_t *vv) {
-    vv[0] = offsetof(cholmod_common, dbound);
-    vv[1] = offsetof(cholmod_common, maxrank);
-    vv[2] = offsetof(cholmod_common, supernodal_switch);
-    vv[3] = offsetof(cholmod_common, supernodal);
-    vv[4] = offsetof(cholmod_common, final_asis);
-    vv[5] = offsetof(cholmod_common, final_super);
-    vv[6] = offsetof(cholmod_common, final_ll);
-    vv[7] = offsetof(cholmod_common, final_pack);
-    vv[8] = offsetof(cholmod_common, final_monotonic);
-    vv[9] = offsetof(cholmod_common, final_resymbol);
-    vv[10] = offsetof(cholmod_common, prefer_zomplex);
-    vv[11] = offsetof(cholmod_common, prefer_upper);
-    vv[12] = offsetof(cholmod_common, print);
-    vv[13] = offsetof(cholmod_common, precise);
-    vv[14] = offsetof(cholmod_common, nmethods);
-    vv[15] = offsetof(cholmod_common, selected);
-    vv[16] = offsetof(cholmod_common, postorder);
-    vv[17] = offsetof(cholmod_common, itype);
-    vv[18] = offsetof(cholmod_common, dtype);
-}
diff --git a/deps/Versions.make b/deps/Versions.make
index 9d19790e945e64..43c9f4b3e01c31 100644
--- a/deps/Versions.make
+++ b/deps/Versions.make
@@ -15,7 +15,7 @@ CSL_JLL_NAME := CompilerSupportLibraries
 
 # Clang (paired with LLVM, only here as a JLL download)
 CLANG_JLL_NAME := Clang
-CLANG_JLL_VER  := 11.0.1+3
+CLANG_JLL_VER  := 12.0.1+0
 
 # DSFMT
 DSFMT_VER := 2.2.4
@@ -44,16 +44,17 @@ LIBUV_VER := 2
 LIBUV_JLL_NAME := LibUV
 
 # LLVM
-LLVM_VER := 11.0.1
-LLVM_ASSERT_JLL_VER := 11.0.1+3
+LLVM_VER := 12.0.1
+LLVM_ASSERT_JLL_VER := 12.0.1+0
 LLVM_JLL_NAME := libLLVM
 
 # LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
 LLVM_TOOLS_JLL_NAME := LLVM
-LLVM_TOOLS_JLL_VER := 11.0.1+3
+LLVM_TOOLS_JLL_VER := 12.0.1+0
+LLVM_TOOLS_ASSERT_JLL_VER := 12.0.1+0
 
 # LLVM libunwind
-LLVMUNWIND_VER := 11.0.1
+LLVMUNWIND_VER := 12.0.1
 LLVMUNWIND_JLL_NAME := LLVMLibUnwind
 
 # MbedTLS
@@ -74,11 +75,11 @@ OBJCONV_JLL_NAME := Objconv
 OBJCONV_JLL_VER  := 2.49.1+0
 
 # blastrampoline
-BLASTRAMPOLINE_VER := 3.0.2
+BLASTRAMPOLINE_VER := 3.0.4
 BLASTRAMPOLINE_JLL_NAME := libblastrampoline
 
 # OpenBLAS
-OPENBLAS_VER := 0.3.13
+OPENBLAS_VER := 0.3.17
 OPENBLAS_JLL_NAME := OpenBLAS
 
 # OpenLibm
@@ -97,8 +98,8 @@ PCRE_VER := 10.36
 PCRE_JLL_NAME := PCRE2
 
 # SuiteSparse
-SUITESPARSE_VER := 5.8.1
-SUITESPARSE_JLL_NAME := SuiteSparse
+LIBSUITESPARSE_VER := 5.10.1
+LIBSUITESPARSE_JLL_NAME := SuiteSparse
 
 # unwind
 UNWIND_VER := 1.3.2
diff --git a/deps/blastrampoline.version b/deps/blastrampoline.version
index 905164db9e272c..86d77ab5bf2939 100644
--- a/deps/blastrampoline.version
+++ b/deps/blastrampoline.version
@@ -1,2 +1,2 @@
-BLASTRAMPOLINE_BRANCH=main
-BLASTRAMPOLINE_SHA1=5882fdf6395afb1ed01a8a10db94b7b3cbd39e16
+BLASTRAMPOLINE_BRANCH=v3.0.4
+BLASTRAMPOLINE_SHA1=23de7a09bf354fe6f655c457bab5bf47fdd2486d
diff --git a/deps/checksums/Downloads-6bb83068bd796c4890baaeb39628ff79a4979374.tar.gz/md5 b/deps/checksums/Downloads-6bb83068bd796c4890baaeb39628ff79a4979374.tar.gz/md5
deleted file mode 100644
index 58efffdca88d22..00000000000000
--- a/deps/checksums/Downloads-6bb83068bd796c4890baaeb39628ff79a4979374.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7e0ab65c9c6f9413a458ee77ad5e0b29
diff --git a/deps/checksums/Downloads-6bb83068bd796c4890baaeb39628ff79a4979374.tar.gz/sha512 b/deps/checksums/Downloads-6bb83068bd796c4890baaeb39628ff79a4979374.tar.gz/sha512
deleted file mode 100644
index c01000751dc6b3..00000000000000
--- a/deps/checksums/Downloads-6bb83068bd796c4890baaeb39628ff79a4979374.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b7961928dffd19fb4e5220da694260ae822e80b5ae39ad503f493dcd225f54bc50d4df1b2e833473a164dd42b89c710528d6c58c60f0997492f2551ae1eebaf4
diff --git a/deps/checksums/Downloads-848d374fc563fa9dc6b4d5e6e5be5ad2022652a7.tar.gz/md5 b/deps/checksums/Downloads-848d374fc563fa9dc6b4d5e6e5be5ad2022652a7.tar.gz/md5
new file mode 100644
index 00000000000000..f15a75d37a9199
--- /dev/null
+++ b/deps/checksums/Downloads-848d374fc563fa9dc6b4d5e6e5be5ad2022652a7.tar.gz/md5
@@ -0,0 +1 @@
+1e360a7f928fdf69dc847cbbd28010f0
diff --git a/deps/checksums/Downloads-848d374fc563fa9dc6b4d5e6e5be5ad2022652a7.tar.gz/sha512 b/deps/checksums/Downloads-848d374fc563fa9dc6b4d5e6e5be5ad2022652a7.tar.gz/sha512
new file mode 100644
index 00000000000000..603b2e4be35f2d
--- /dev/null
+++ b/deps/checksums/Downloads-848d374fc563fa9dc6b4d5e6e5be5ad2022652a7.tar.gz/sha512
@@ -0,0 +1 @@
+b9df90fc9d5bce2b8d1ed2c800fd0666e3f2ac85f802da4fcbab93ed2894310f90d0ea10ebe1c859c8faa8ddb4e1cef58aa72c1e0decf03f44fa3f3eabb1bcce
diff --git a/deps/checksums/NetworkOptions-42a0b5fcb7edb8ed5b0ae699f15ca6aedc0098ca.tar.gz/md5 b/deps/checksums/NetworkOptions-42a0b5fcb7edb8ed5b0ae699f15ca6aedc0098ca.tar.gz/md5
new file mode 100644
index 00000000000000..f4cb70703e6e2c
--- /dev/null
+++ b/deps/checksums/NetworkOptions-42a0b5fcb7edb8ed5b0ae699f15ca6aedc0098ca.tar.gz/md5
@@ -0,0 +1 @@
+71511cd9b2192b86e23d39e58aca489b
diff --git a/deps/checksums/NetworkOptions-42a0b5fcb7edb8ed5b0ae699f15ca6aedc0098ca.tar.gz/sha512 b/deps/checksums/NetworkOptions-42a0b5fcb7edb8ed5b0ae699f15ca6aedc0098ca.tar.gz/sha512
new file mode 100644
index 00000000000000..22a61934ed190a
--- /dev/null
+++ b/deps/checksums/NetworkOptions-42a0b5fcb7edb8ed5b0ae699f15ca6aedc0098ca.tar.gz/sha512
@@ -0,0 +1 @@
+fb70d3cb0c305929ab8223c5b16aa95cb17785da6c9b2f3dc0f8d34c26ddadd88abc7bb3e9ec53a2cdfeca2396fd698393b768a08b2692e6689327509d836620
diff --git a/deps/checksums/NetworkOptions-a251de1e1c8ce4edc351d0f05233ba7fe7d2c27a.tar.gz/md5 b/deps/checksums/NetworkOptions-a251de1e1c8ce4edc351d0f05233ba7fe7d2c27a.tar.gz/md5
deleted file mode 100644
index db691d691401ec..00000000000000
--- a/deps/checksums/NetworkOptions-a251de1e1c8ce4edc351d0f05233ba7fe7d2c27a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2d682cc42392b71ccf65da74e5450c7a
diff --git a/deps/checksums/NetworkOptions-a251de1e1c8ce4edc351d0f05233ba7fe7d2c27a.tar.gz/sha512 b/deps/checksums/NetworkOptions-a251de1e1c8ce4edc351d0f05233ba7fe7d2c27a.tar.gz/sha512
deleted file mode 100644
index 9a2dc447adc830..00000000000000
--- a/deps/checksums/NetworkOptions-a251de1e1c8ce4edc351d0f05233ba7fe7d2c27a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-684c10e6fcd6eb24408cdf2d741972a64526e022c17f05c329618844dc328f71d2dd2999c42a769ec4d40d714a6a97c9d540bd4a26b78366d6dc9649eb3b2b33
diff --git a/deps/checksums/Pkg-252e895056b17490bfeabd81f52743bad947e997.tar.gz/md5 b/deps/checksums/Pkg-252e895056b17490bfeabd81f52743bad947e997.tar.gz/md5
new file mode 100644
index 00000000000000..21bbb58e5a91ed
--- /dev/null
+++ b/deps/checksums/Pkg-252e895056b17490bfeabd81f52743bad947e997.tar.gz/md5
@@ -0,0 +1 @@
+743007182ff00d0907bcc6045767e72a
diff --git a/deps/checksums/Pkg-252e895056b17490bfeabd81f52743bad947e997.tar.gz/sha512 b/deps/checksums/Pkg-252e895056b17490bfeabd81f52743bad947e997.tar.gz/sha512
new file mode 100644
index 00000000000000..24cf91de419ab1
--- /dev/null
+++ b/deps/checksums/Pkg-252e895056b17490bfeabd81f52743bad947e997.tar.gz/sha512
@@ -0,0 +1 @@
+66b19b7ac0899b8b8c9817c926329562fbe07d015224a33b9ee23118e9dd07783b5760dc406f028d613b8ae69101ebc54ea6c52cceecffbd9e3a6f7f9c0cf085
diff --git a/deps/checksums/Pkg-af7e41cd9d9529bfc8e8fecd7e24c7392c73cdbc.tar.gz/md5 b/deps/checksums/Pkg-af7e41cd9d9529bfc8e8fecd7e24c7392c73cdbc.tar.gz/md5
deleted file mode 100644
index 8ff865fe025939..00000000000000
--- a/deps/checksums/Pkg-af7e41cd9d9529bfc8e8fecd7e24c7392c73cdbc.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f70da472ebac16aaec6f38e69e49c6d7
diff --git a/deps/checksums/Pkg-af7e41cd9d9529bfc8e8fecd7e24c7392c73cdbc.tar.gz/sha512 b/deps/checksums/Pkg-af7e41cd9d9529bfc8e8fecd7e24c7392c73cdbc.tar.gz/sha512
deleted file mode 100644
index 84f9ae288d8a23..00000000000000
--- a/deps/checksums/Pkg-af7e41cd9d9529bfc8e8fecd7e24c7392c73cdbc.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-83f6966bc5a55f80fce9899e8f48a0e26dae37d88a5819b1a67d81d2ab5bf57999acd1a42b2ca7366c428efb2a8661679820550df8d8f7a4a74f9a4406da096d
diff --git a/deps/checksums/Statistics-4b3ef9aaa79350510ca0be395458f66051c2f92d.tar.gz/md5 b/deps/checksums/Statistics-4b3ef9aaa79350510ca0be395458f66051c2f92d.tar.gz/md5
deleted file mode 100644
index ca87f66636631e..00000000000000
--- a/deps/checksums/Statistics-4b3ef9aaa79350510ca0be395458f66051c2f92d.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-13496a277a7e7ef7a11debd3f8384064
diff --git a/deps/checksums/Statistics-4b3ef9aaa79350510ca0be395458f66051c2f92d.tar.gz/sha512 b/deps/checksums/Statistics-4b3ef9aaa79350510ca0be395458f66051c2f92d.tar.gz/sha512
deleted file mode 100644
index 3629aef65d450b..00000000000000
--- a/deps/checksums/Statistics-4b3ef9aaa79350510ca0be395458f66051c2f92d.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-25333568516ddbd319456ae13cb3e12dcf3d2d9ca23bf8da32e59d8024ca03650987289792ed9641628bc78f5c79f3498495287c8fb2795e9eed4e70881eb831
diff --git a/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/md5 b/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/md5
new file mode 100644
index 00000000000000..62f19540372412
--- /dev/null
+++ b/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/md5
@@ -0,0 +1 @@
+4c09536f4f769b23e88fee769f5a09bd
diff --git a/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/sha512 b/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/sha512
new file mode 100644
index 00000000000000..a79b037b94de22
--- /dev/null
+++ b/deps/checksums/Statistics-54f9b0d999813aa9fab039f632df222ffd2a96a8.tar.gz/sha512
@@ -0,0 +1 @@
+e409fa943a9683a129b80c78ef74572df316ed414dfc8c208f1500d0f07d4d41870d44654446e2c20d1b9ed11e62c4fc6107b6e5789939edbd049fc2aaf22f63
diff --git a/deps/checksums/Tar-ac4d442266a676ce2d1a43acb55fc07d1edc6566.tar.gz/md5 b/deps/checksums/Tar-ac4d442266a676ce2d1a43acb55fc07d1edc6566.tar.gz/md5
deleted file mode 100644
index 5f1a8151702d6e..00000000000000
--- a/deps/checksums/Tar-ac4d442266a676ce2d1a43acb55fc07d1edc6566.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8e142a0c1761068128b4ac229aae584d
diff --git a/deps/checksums/Tar-ac4d442266a676ce2d1a43acb55fc07d1edc6566.tar.gz/sha512 b/deps/checksums/Tar-ac4d442266a676ce2d1a43acb55fc07d1edc6566.tar.gz/sha512
deleted file mode 100644
index 0c3dd7423504f8..00000000000000
--- a/deps/checksums/Tar-ac4d442266a676ce2d1a43acb55fc07d1edc6566.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d899b09c3ab4d94605297c716838f21dae7f1467b2785c9fff960ddad645161148fcdb2bc114e94da24f567098af4abce49960986f91eb26a2c234928fe6bdc9
diff --git a/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/md5 b/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/md5
new file mode 100644
index 00000000000000..27fb3f83ef22e5
--- /dev/null
+++ b/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/md5
@@ -0,0 +1 @@
+bb8b923ec61cbd3160aa81017308f0f8
diff --git a/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/sha512 b/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/sha512
new file mode 100644
index 00000000000000..79a65ec673d46e
--- /dev/null
+++ b/deps/checksums/Tar-ffb3dd5e697eb6690fce9cceb67edb82134f8337.tar.gz/sha512
@@ -0,0 +1 @@
+79f0a94ea1fd895c6afb52e6e4f26a295cc2d2b9317e5b0f80017b036836c269a982b6028e3e8002675fb8d56e2144ba426769b9701adbf065980d11b9c1fecf
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index 02049d47520962..cdb05346bfe93d 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,34 +1,34 @@
-blastrampoline-5882fdf6395afb1ed01a8a10db94b7b3cbd39e16.tar.gz/md5/e0d62a761862e4331e39f3a863558b15
-blastrampoline-5882fdf6395afb1ed01a8a10db94b7b3cbd39e16.tar.gz/sha512/b0637d903183b42ad7fff0e7c6e2c9feda0082d31d6a86926586c56437fc91041b5fea2c4cd4f5b588e8f86e71fdbf4ac216fa6862a1df360faa77c8966e7b0e
-libblastrampoline.v3.0.2+0.aarch64-apple-darwin.tar.gz/md5/4f7ba7d67f6fc8148f5be9f125b09628
-libblastrampoline.v3.0.2+0.aarch64-apple-darwin.tar.gz/sha512/e1b950317edbfa5fa91c343ff99b008db82f2a7f2c4a19a51b4c9b36faf8f9c1d11453d8b3234eb29ee30adeca10877098ce456d181b9c068ba086a59792165f
-libblastrampoline.v3.0.2+0.aarch64-linux-gnu.tar.gz/md5/ba18ed1a8db10a2e630a8c09b4911af3
-libblastrampoline.v3.0.2+0.aarch64-linux-gnu.tar.gz/sha512/242bd84474827725f45faa1e52ecc1f8f1d9d0abd9fa8f0f994557d0a508b5959f10c098d9ede579f9421fa8c7559a9ab3ef01e71aa44fd1f691dc8f96d1f4bd
-libblastrampoline.v3.0.2+0.aarch64-linux-musl.tar.gz/md5/6296dad440625727da3a167a4dcb9d14
-libblastrampoline.v3.0.2+0.aarch64-linux-musl.tar.gz/sha512/0f7975b008003575ba499b6c98ae2231d6bd5d3831f0b737ea0019b63d6ddb8bfd526350390a4727569181accd895e0f82f00c23cdb2aa11e5eafba046c1224a
-libblastrampoline.v3.0.2+0.armv6l-linux-gnueabihf.tar.gz/md5/3dc56056be7f96e39a25bf1e2413e1f6
-libblastrampoline.v3.0.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/e1c66d40ff078736502597cf2e06165741ac387d987b5c5c96053f632b74ee80bf0df0318da15f87833a9a096b36113b4bd882c4f4c90810a389015efb66a1d6
-libblastrampoline.v3.0.2+0.armv6l-linux-musleabihf.tar.gz/md5/fb2db3dfc963ac7fe78b738bfbc68d8a
-libblastrampoline.v3.0.2+0.armv6l-linux-musleabihf.tar.gz/sha512/14878afb19bc1925e9d8f5d4586ce03c8b248bbf669e367b5114c3d289f99340c1cf002e5b030e4ca8272021c0173bc39faeddf3c4d8ce890590d94d28b73149
-libblastrampoline.v3.0.2+0.armv7l-linux-gnueabihf.tar.gz/md5/216eca57cb50379ec2766b59d66014b3
-libblastrampoline.v3.0.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/478cac263aa42db5800d3d70efba81f3111ef371ba0f9940cc64a4a0b8306751ab4f517dc42a136b5c3e3a50cdf25ab0ad769e9eb854de812fba47e54f0c0f16
-libblastrampoline.v3.0.2+0.armv7l-linux-musleabihf.tar.gz/md5/6896c312111b20b6b9c45398991a4309
-libblastrampoline.v3.0.2+0.armv7l-linux-musleabihf.tar.gz/sha512/43241768dff5f3b4bec18ac72cfb328dc6498ddfb1b0ed98b5a2e4052ea7bb1d1e7a669910a1edb50fcb938dd460578bee268baf28a9eb69426c053259272a74
-libblastrampoline.v3.0.2+0.i686-linux-gnu.tar.gz/md5/9ab7739ae2816070d7bd57cacc2ae5b1
-libblastrampoline.v3.0.2+0.i686-linux-gnu.tar.gz/sha512/de747d285db84ccd5983e2acef4b98d03d209fe2454ac48142190be28db655c62e6c6d4041c56b227cae4f247276ad6f0491add09c07bc46dc4cb171030cff63
-libblastrampoline.v3.0.2+0.i686-linux-musl.tar.gz/md5/0621e71f25a3a69dc06acc845fb89999
-libblastrampoline.v3.0.2+0.i686-linux-musl.tar.gz/sha512/bb80afabf0525ab11beec9829097498a1501cd2f51d83f0599d17d40a684ec2ac3d7a61150cfec6b2464872184b435d0fd912fd6d4a4cd5d286a029a2633961d
-libblastrampoline.v3.0.2+0.i686-w64-mingw32.tar.gz/md5/a03bd4ea83d040dfc3658be3bf089210
-libblastrampoline.v3.0.2+0.i686-w64-mingw32.tar.gz/sha512/0585aad4e9fd0b2722ed3b4693b41ceeeef18b34f00246f98352556ff17e1213342f457916a5fcf53d0d87a93be1ad22af17c9d4a3b1fec80ad21ef964cde1db
-libblastrampoline.v3.0.2+0.powerpc64le-linux-gnu.tar.gz/md5/89ac3a8c5b0fb9b02b14d61f7b8e5055
-libblastrampoline.v3.0.2+0.powerpc64le-linux-gnu.tar.gz/sha512/3f0bf8921a8574cbea0d4a3a7e9a521b9e318acfbe0476b01e4266a9c674995f8ad9d21a05976b65c04dce99afd2d96d50d8a3da0ca53ce3cf843e78a7d19871
-libblastrampoline.v3.0.2+0.x86_64-apple-darwin.tar.gz/md5/0da14952e48c982321a63a0877b4a685
-libblastrampoline.v3.0.2+0.x86_64-apple-darwin.tar.gz/sha512/8aa97721c77a310864632c54a46768ad62507a40c1dc36fc240bf409cc25db3b443270b3cdbce6a7794bc6e59e492220adb69d90a2804c32d0d7a41a7712366a
-libblastrampoline.v3.0.2+0.x86_64-linux-gnu.tar.gz/md5/a8884383157ee9dc33f8d885510d806a
-libblastrampoline.v3.0.2+0.x86_64-linux-gnu.tar.gz/sha512/5f0d8a2d36778c08b46769f4eeae2bad03c0763dd84c94531911e5979eecad816b198ee3cadf0d03d4339bcbef28ed5d20e2799d00f13b0bc818f94fb7521675
-libblastrampoline.v3.0.2+0.x86_64-linux-musl.tar.gz/md5/1f59ccb2555dd76d92f4d779029bfccc
-libblastrampoline.v3.0.2+0.x86_64-linux-musl.tar.gz/sha512/c631648a90306d11aa80880ed857de7cdc40d354f9a26979e71fbc44fa5517596a68af53f651a683307fb8112abd439b335ad1df18c0301afc7f7f24f380a97a
-libblastrampoline.v3.0.2+0.x86_64-unknown-freebsd.tar.gz/md5/2dfab8bbdcfcdb02f1bacac6c2317587
-libblastrampoline.v3.0.2+0.x86_64-unknown-freebsd.tar.gz/sha512/d93b277a51e43d150d1af781ddc122a8e093450a83ce376a37e1f32d0eaf382a6a852722181a11765cfbbd514287854937eab88e9bb09688da0d323ede46e1b3
-libblastrampoline.v3.0.2+0.x86_64-w64-mingw32.tar.gz/md5/e233f526a30b02d8d486918607af44d6
-libblastrampoline.v3.0.2+0.x86_64-w64-mingw32.tar.gz/sha512/25ca3feb99157f80dc45661319a2c5a00ea7943830905d7530a9053b89d47e4e4c4efbe7b374bf9b7e7c44774fc2b2061c4b2b7f09f6cc3e14aa1867039f15be
+blastrampoline-23de7a09bf354fe6f655c457bab5bf47fdd2486d.tar.gz/md5/0c8016a6e30bc2237184b816b613d11c
+blastrampoline-23de7a09bf354fe6f655c457bab5bf47fdd2486d.tar.gz/sha512/7b7dbb101cf05ac833a8e5b09f1eec8eb99f0caafbe80075751a5f7e0bfe03a3b19d11d3507dadd13f503cfa9fc1a6cd53f3461af7d5afb39ca385a0ee26120b
+libblastrampoline.v3.1.0+0.aarch64-apple-darwin.tar.gz/md5/9a72574c810323ebe7b496266a5b3d90
+libblastrampoline.v3.1.0+0.aarch64-apple-darwin.tar.gz/sha512/559c91374882a137ce8b9f008e9d815dfebc175f65ac07bf784d590e31a07f60302de3d498e5dcc3f409f59fc2c7fbd1fb32623d25ed65bcc31c35a74c11f260
+libblastrampoline.v3.1.0+0.aarch64-linux-gnu.tar.gz/md5/95802755d3b6205c88163ba313662fac
+libblastrampoline.v3.1.0+0.aarch64-linux-gnu.tar.gz/sha512/7e3198429602fdd1196a45007e08a9b4fef2d303f8c7d7ff423fd19dfa970519d7402e95ea9acb32e7cd236b2c2f162e864a08f6062a8990cf9cbf75271a4074
+libblastrampoline.v3.1.0+0.aarch64-linux-musl.tar.gz/md5/f1251167736acfcbe7a3fe0398796702
+libblastrampoline.v3.1.0+0.aarch64-linux-musl.tar.gz/sha512/0564e79d44f2a53604a3941b9b638865e8f3f5b2945e43c08643a6fd13e3162c8c6ed156bc7f9f6fc69d0b6b23a8abd493ce4dc847b78f4cfe4a0bf5583fe4f0
+libblastrampoline.v3.1.0+0.armv6l-linux-gnueabihf.tar.gz/md5/fe48bd1c1c0db93db72ded0df968ff40
+libblastrampoline.v3.1.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/6958a727a25668ecdbc9b38e1562239df64900ac426b13b1949a68cfb6c481b7431a70764343a9c3e65c683b5083d15881e7c6d2adc9bc3450d0816f54fcf224
+libblastrampoline.v3.1.0+0.armv6l-linux-musleabihf.tar.gz/md5/28be9a338c9ea2f5c1b82a6230ff8324
+libblastrampoline.v3.1.0+0.armv6l-linux-musleabihf.tar.gz/sha512/53d4e5ba559872c9aac60cb995df6d9711617f728a22b1b8f8dfb3f7286c8bd62f24dcedf428b457e1062799e31b9a68a17b653daae2f01131aa86d0801bbb32
+libblastrampoline.v3.1.0+0.armv7l-linux-gnueabihf.tar.gz/md5/c1f1dcd14e110b1723557b9c0932637d
+libblastrampoline.v3.1.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/b573de04784dfd7803616a968675ce314af2ca4429029d57186619bf1b10d83d32801bcb74dd82430cb33d53d97117b9dd6d5cd64bb5c3850148a2414a0499f3
+libblastrampoline.v3.1.0+0.armv7l-linux-musleabihf.tar.gz/md5/f00cc93db3543dce1492314fd8670c20
+libblastrampoline.v3.1.0+0.armv7l-linux-musleabihf.tar.gz/sha512/13569d8aed844c1fb48a4e125bc822588fb795707e247f14e1b25dcd4622204f5b7bb976e50cb0fe9d49f1a327a37c2da43ebf0d2b4aac4d5e911e7c49046847
+libblastrampoline.v3.1.0+0.i686-linux-gnu.tar.gz/md5/867876ae938dff24a73f0f2f3ffc0544
+libblastrampoline.v3.1.0+0.i686-linux-gnu.tar.gz/sha512/cd7426ccc7eb59c7367ec5f23813d2cc19aac62eb0c43a2f769270f16da8daaece7dbde78e1487dd9ab624eb6389683400ee5a0654e45d6ba4f13496b9330cb5
+libblastrampoline.v3.1.0+0.i686-linux-musl.tar.gz/md5/57d1942e23432b6a99365170473be723
+libblastrampoline.v3.1.0+0.i686-linux-musl.tar.gz/sha512/775eb115df4554a04cc13c407915287a3917307c97f5e04618315478a862cc2d75597ac391d05e7ccbeb901a9bebc89e1ebb9e608b4737229eef253204d6f058
+libblastrampoline.v3.1.0+0.i686-w64-mingw32.tar.gz/md5/8548d8ffacfcf5708b8df80d3ec81ad9
+libblastrampoline.v3.1.0+0.i686-w64-mingw32.tar.gz/sha512/2397a03aae3794b8376606b88f33e3dd4ee4801a2f8e0cff5f28fd16a24024acb4babe6c5e041c1b04ac3dca025160d78c85b37a8ae8948d5969b2f618ea9053
+libblastrampoline.v3.1.0+0.powerpc64le-linux-gnu.tar.gz/md5/a6dad5de86fab27cbe80eee54d5ce995
+libblastrampoline.v3.1.0+0.powerpc64le-linux-gnu.tar.gz/sha512/5b9a0632c89c5b6e2660a88212293cbfe707cfc4b6d86abd1887068843c42da6513f65ca9601fa78baef508877c9e682cbbddc88a513692373c3c373c6b2d939
+libblastrampoline.v3.1.0+0.x86_64-apple-darwin.tar.gz/md5/03e1a3ee1647d5056cd459c49ab46e73
+libblastrampoline.v3.1.0+0.x86_64-apple-darwin.tar.gz/sha512/de5e772876b4ebcdd2981d382c613849c79206adfcc1eeac03e7b4d146fb33cbffe2b66791b9a02ad216d1959aaa56cbd36e9a8fc3f633216141280a27ff5b92
+libblastrampoline.v3.1.0+0.x86_64-linux-gnu.tar.gz/md5/d731c29b90a26b70a2d65d15a213be33
+libblastrampoline.v3.1.0+0.x86_64-linux-gnu.tar.gz/sha512/558aac3893e1d4e15dfd48b8be0567774560b6cd063d46ab935dd20f62a1abfb6e89d549cb010aa1b9c30fdc86bbea43bc16921819a7e5446cc6a9f3c6c293ad
+libblastrampoline.v3.1.0+0.x86_64-linux-musl.tar.gz/md5/9023d6dc4822e523cbd6071ca0f39557
+libblastrampoline.v3.1.0+0.x86_64-linux-musl.tar.gz/sha512/34d13ddaded44c2bf48f61489650c638d51bc52e4cfee3bfd89dbdcd3895d3e5a862c88c05fa76a1bf29e410125209fde717ffede9b4f4a39705d1866848df62
+libblastrampoline.v3.1.0+0.x86_64-unknown-freebsd.tar.gz/md5/b68495b29ad8b31e4a4888cbbbe15ec1
+libblastrampoline.v3.1.0+0.x86_64-unknown-freebsd.tar.gz/sha512/8783ba5a2b8f9391d0429045cf369889dd99b7253bf6a259bc613dedd038722a0f556c13a9c9c38da7ffd1050ee768d3d614944726629c7aa9b16f56e5b4a9e1
+libblastrampoline.v3.1.0+0.x86_64-w64-mingw32.tar.gz/md5/0a0a31950022a56de76698bf5b14bb08
+libblastrampoline.v3.1.0+0.x86_64-w64-mingw32.tar.gz/sha512/dfc1797cfe64b09381227508d16872421af99f416ffe5c3703f0fed4a88f8d52b2dd7f811388442264fe3060012b7df55f6f2a2b08887b0a36307804a978a6da
diff --git a/deps/checksums/clang b/deps/checksums/clang
index 3e438a189a5753..6d6d4d100a4329 100644
--- a/deps/checksums/clang
+++ b/deps/checksums/clang
@@ -1,58 +1,58 @@
-Clang.v11.0.1+3.aarch64-apple-darwin.tar.gz/md5/55c50726cd43c1b49a222ec92ea05431
-Clang.v11.0.1+3.aarch64-apple-darwin.tar.gz/sha512/009b29068af7d8a008e34853faa3ce1e20817d3a7975fe4ca5657d1303e546410c903af345356fb94c88f8e95474895e68414a5a04096b2e9bdeed732780637e
-Clang.v11.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/md5/e5d8d1d5364dba4c5891c9a127ee358d
-Clang.v11.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/sha512/a5efd0a4c0b05ab6ac55081a28cace0cffcd3eefeaa872d239711f667425808f29939c90a327bbf8664d2d065d1b807ed802c161599b7fbda29f554ba9148b48
-Clang.v11.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/md5/67716fd169ad7cb464a576cc10d42ad0
-Clang.v11.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/sha512/5780d34b24fc786406f4f4f814ee6b8bf6366964d7252b61ea7dd3a09283ffb266270d76c2580ca1285a753d68922cd42d2d75b9527d331747325c22224536b7
-Clang.v11.0.1+3.aarch64-linux-musl-cxx03.tar.gz/md5/77e3d4ff674493fda7958b9ad950e934
-Clang.v11.0.1+3.aarch64-linux-musl-cxx03.tar.gz/sha512/813a0cac9ae4972fe426451b9fa0d640f25ff5edfe22624a621cbbe737f7977259d062710f44475139c8298265336fcc67fc28dc7eeddd95039bef44dc080ac4
-Clang.v11.0.1+3.aarch64-linux-musl-cxx11.tar.gz/md5/e53b1db80024cc3a1152d7694291cbbd
-Clang.v11.0.1+3.aarch64-linux-musl-cxx11.tar.gz/sha512/f6b9c15655ca8bb54cd568728fe5ac33df62abbf07029816e892b0483afbb5adc69fde1789b90687137be491fa0351fa5806cf6d869232b3cbb4b42e1f7a5a64
-Clang.v11.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/7ae72cb4b33be0250402f29217e32c53
-Clang.v11.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/1b85e78a78c2bd437c3b21855bb36f8200543d0daa3c2d7be68d5c75ee06a74534e9b8b80ac6048fe05c426e441af3ff019076bf2d045152eb142d7d5769b525
-Clang.v11.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/b83a93c3da36d8eb469f5fd39e8bcb9d
-Clang.v11.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/9aab7d5f9be0ce1580ed9d3b3c4da0bff1fff7094794d58620e1fb646ac7733708162f69ad281cd441c4cbbec8985c9906887f4979e5a5ebec8a05c6be06c91e
-Clang.v11.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/md5/ea570204363ae3b62965a000e6a3948e
-Clang.v11.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/21c9a1418029d5ca0d3d16e943914d8a4932bdbaee1152f74af681349abaebfe4d87a2f51ff06196cea59363b3bec706c629d61f5a37cf0e75cd645fed2ae835
-Clang.v11.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/md5/63130e9a585a747e8f8e31a1b71a27b8
-Clang.v11.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/27f3f72afa62b4f6c8e80d5934cc17119336c65ee070f05d98182e7e83fd02f0ab439a924165c7e6a414409eb2d2044ddf778c0e3e7a6994643b5d0d8b3e2ab9
-Clang.v11.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/0d85a6ace70f1295202ba1c50c5948f4
-Clang.v11.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/574c289c06283e8c2530e64bafb5ff606c27e76a2577acf84112e1ee9b8a3b299abde1e68dfd92700fa472dd6032af02399cca8b436136a5642ce545272398f2
-Clang.v11.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/595279b252dbab5d6411db0d6a514985
-Clang.v11.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/6a1e55e09cfc2325772f11a67f3e7e43872ce07fcceea6852649635778bd472e3e9e6cd0e7e2bf5dbefa1bf0862bae478329b8aedb48b4d7d8cf6ea074e98ce8
-Clang.v11.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/md5/6f973ced20fd2dabf60b12b5ff3b0cb7
-Clang.v11.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/dfc77ef9b13111d5b2d6cdd4d27b0df086ee7a79b535bfc2dcbcc5d9014d0cb0f583c792f812100d64eeadf938645ebe3af72d2df45b396b15b3bc54a0dc3d40
-Clang.v11.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/md5/f96d5a441a56ee914372249fa0cb83e7
-Clang.v11.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/dc2f4afbad26afe4eb43054c70674674a1258f6a1bb17ebd989179336e3de119b976c3429880b3e614668ec5da6c9401346c0fdb8fb45311e818a948c5a2e851
-Clang.v11.0.1+3.i686-linux-gnu-cxx03.tar.gz/md5/7f18573c99258f1887aba30228bab25e
-Clang.v11.0.1+3.i686-linux-gnu-cxx03.tar.gz/sha512/6fa4296f59515ca058d2012ab200e65d4c1f130f40b7a3796d4237f6bf9a72f0a3872bbf83ef1865e7c1cdb75de9c5dd3385a64c55be6741fdb91bd5ef00c6de
-Clang.v11.0.1+3.i686-linux-gnu-cxx11.tar.gz/md5/dd8333ce489719b53cf4f83300ec5c91
-Clang.v11.0.1+3.i686-linux-gnu-cxx11.tar.gz/sha512/356497a88871fa9a725d0206b09462cf4af96cb866724e7c9ca7a7060bea62a6b39426cb2a17d000d17b04052cc78893008c78b1a1c3cf30d6f84f10cda73fd6
-Clang.v11.0.1+3.i686-linux-musl-cxx03.tar.gz/md5/f82aa0d21872e9b50ef238d004ff28d3
-Clang.v11.0.1+3.i686-linux-musl-cxx03.tar.gz/sha512/a2c216727acf6a42c5b914321541a752cdc3cc0b64ae39d1ede1588935ef9d8a4678e5d6f61b2bc1df96a1b4d63812c14e5e6adec7f2babc3175ebfb72421334
-Clang.v11.0.1+3.i686-linux-musl-cxx11.tar.gz/md5/3317c99fba259bddb9f351e79564c831
-Clang.v11.0.1+3.i686-linux-musl-cxx11.tar.gz/sha512/bda2aa0decf76b787986a9a308a3832856cce9545b2c75da49193f23e64ac00dd839d243aa85737f28288c284b9770c1f5408deb4e5440293262d14df2ec759c
-Clang.v11.0.1+3.i686-w64-mingw32-cxx03.tar.gz/md5/826749b99dc7b5f24bac5d0531e84468
-Clang.v11.0.1+3.i686-w64-mingw32-cxx03.tar.gz/sha512/e15ba270c2ad915be5d0072830b97e4d66daa2e18b3ba4db8f9687633c3ef0dfc1ee312f4fbdd1c921374709f68cb1b4bc9e58b9ffcbdd200c1eb665fa9a595b
-Clang.v11.0.1+3.i686-w64-mingw32-cxx11.tar.gz/md5/2f217eae89b066e190243b1c120cc46a
-Clang.v11.0.1+3.i686-w64-mingw32-cxx11.tar.gz/sha512/526846408cced1707b85f1e40796eb1607d47fb1509435fe7e28368a74e84fa56276aaf9fcab47664615cfe4349c7af3b1ad9995126e888e9002e1a4d20cb1cd
-Clang.v11.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/md5/6b6472551ff2e00249fb39ce80df330d
-Clang.v11.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/1d2780eb20c2f43659ab2502ff88334a7bf827bb533d37de058e524e9fee9a8ace431761495e93dc9564c80e94aa569066c235bc958cf893c4de5249c4a12676
-Clang.v11.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/md5/74cdafebc133bc54bffabb40547622db
-Clang.v11.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/afcad373d30eb465964e17e70b66fd3ea72c1565b1bceb811d82188368a788fdf6a18e388be6d329d9a632f68890d26b878ac0e107ec975b93fc1aec81bab89e
-Clang.v11.0.1+3.x86_64-apple-darwin.tar.gz/md5/a4cb1dfe2b86f24b52d730835f8ee31c
-Clang.v11.0.1+3.x86_64-apple-darwin.tar.gz/sha512/4526942c33bb4789d3b5975023abdff81d747ecc26b9863c7ea61f87550506d2102a8f83e21085ebbc743ba2be8ddb2b6d4e12a65c9a58e02f6254a335d934f2
-Clang.v11.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/md5/b9bd14c451a9e8efd28c916a85cdb442
-Clang.v11.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/sha512/7cf7ba37c600669f12a74e6760ee5b166b6de4f7e817b9ddf133275a242335ed9a460c226a3f4102cec47bb552d8258b34cf3cedd35b16756f1efb5a3608250e
-Clang.v11.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/md5/9e930813e07758c80ca7a1575886afc7
-Clang.v11.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/sha512/e0dce74b7377256fa65f1e6ce461ef5a6424b6ddb157be36393cd8038153afbecf460c966979e267d1de5cb0ed4509a093ed58210eb3cb69391d16b938a4e7a4
-Clang.v11.0.1+3.x86_64-linux-musl-cxx03.tar.gz/md5/6e345f292464e430d3408826d0ce9573
-Clang.v11.0.1+3.x86_64-linux-musl-cxx03.tar.gz/sha512/f802e46a46a2946641f07b8b2ef41fee8efbe98e13d23a8ee01047831992b6f115d157520e44879978e27e994da4b63c1a09e6ace63b7c1a1f6351ec04c4885d
-Clang.v11.0.1+3.x86_64-linux-musl-cxx11.tar.gz/md5/ec61190097b601446985c96ca3ffc58e
-Clang.v11.0.1+3.x86_64-linux-musl-cxx11.tar.gz/sha512/00de2b64bda05e27f5129d5702894823517f5da01a42892ee20f69e55b6004b0c1020ecb941330243a565048ed249d827612dc3ce69687401c731ac168566049
-Clang.v11.0.1+3.x86_64-unknown-freebsd.tar.gz/md5/dac3f7585db42a3e8eae4d563abc5d68
-Clang.v11.0.1+3.x86_64-unknown-freebsd.tar.gz/sha512/082ff3234db9b36346553b5c97d5b7dd55f7458aa4f1f1d358e4d4ee2c7805525514c18c35d319a22c56e6c3bfdd4160ff0a2157f24e1123c5879d167f1a2d13
-Clang.v11.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/md5/81aa45c118d96afe17accb9a0e02ea49
-Clang.v11.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/sha512/240979d6e958ad75b6100b5a4fe650c21076dd06b8c9ff13fa397b34bf9816d2d8b8c3ed6040235ef82d11d885cfbda20e5500af3ba3034442744d201264acb0
-Clang.v11.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/md5/b1695d5e231249c07b5cd9d868b6434d
-Clang.v11.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/sha512/24a33877f7b97a67cd8acc21ce23d31802bea13f416b5bbf28c262277f4a9e8f6a2cdee642d4d49b7d9b7948bc538f3b1bacb259ab0298e58db910044ebcceaf
+Clang.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/9d0bd5ff9934f4d0832b89b2c896f13b
+Clang.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/1b6325b295047b4d31334dfbadabb7716e19ea1eccb809f366b06f241719f18ea54836f45d982d3cf66c4a6ae09686a4a55c76dcec1f883f0192347d6fb779d5
+Clang.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/d3e8c671f21b15522832e80844e81199
+Clang.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/3b37e689aca086b05dea1b2293eb0fa8554b9c846c0ab7bb132be3600e66efb48e929e5b84a3ea1cfc9e9a3941521a395d1fab3852c9877422a1f62133fe19fb
+Clang.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/8f790f74846a5dada6a5d2db3384d783
+Clang.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/d8764c3f5fb5e8a3ccb93d4425145747d1e118c97beff42afc801db0189d72758b10c1ff72b9cef71e4d226065b3192710e34ecab3b988d9a27f48a88ad18194
+Clang.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/7a11f6ab531cf5e2f453145fb5f67df6
+Clang.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/6dad060cde272417daaa7a88bca17fcdbb1f2c4ab7ac2d888e013be34d150ff5fb5c5d74a5016b9bc666c2a8d02a2b68029a51c5cb27d63ec143bcbd5f1ae4c4
+Clang.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/4ff76d1479f9e8e1462302cf1c4eb209
+Clang.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/d295e13c0ae4eb22d499cac17dad660d47d2a4cd259295dafe33f0f9d160c0fa3a6d793fbcbe155914fd85b8fe65d44db2608e1e98d243ea08fd21abff5998cc
+Clang.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/0f3945cf21990dd64da3f5a85e0203b4
+Clang.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/45a4817d5c0d6af588264f7c996bd583c437c67924f4220256cf0c2b77662b9c42d3f3aff1a83de95cbdef76c94a079a7c2dd52585501188c865049dd0221a7d
+Clang.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/8bfc3fc5bb8e71c4e8ae22a65c93e2a4
+Clang.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/78d529f12928d950a5a6da16efe379c722628ac9647b325b7e6617623a9c809315b7e594dd9c2a12a7cd66c2a8a63956b2e0cd7c6f3ed149ba75b6300d766ccc
+Clang.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/3fb0f6f8be3a722b2df4cdf1d1756170
+Clang.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/72c76603c732c56c4ac0f4235a38f6be739e4f630da9f93d50a4411fce7f9995f0b86d7ff1daf60177f329ac46145c2fa7d581ebdb55c4a7f314b85633cb31ab
+Clang.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/b346689d4ab5757aad546521946ed622
+Clang.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/1af114292326176fcbda578a07f73c1eab90b31a0db87819bfc38c71623b3134952811390e2051466f0d48132040963fb1d93d5f121bf8fd81f58959a23c10ad
+Clang.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/af253b7620b1e80772d5fd6d5d3b9603
+Clang.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/1cbe3695204607c667a64798a9d44a6de991e5445baf4952502c8c2bc22ad0c05f17aad76d58b634015079049afec934e805dd68029aa62c3bda7e3834abdeb2
+Clang.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/0e0129a8f6d2c09a2954cbf64b9dd9af
+Clang.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/2c9bcd56f870038dd84e97cabd3861f16ee33aab99b8cf4feef9c27140ce79035394f41037f0ad8034eabf9195e22868317c74e177a68aef83b597aa67633745
+Clang.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/9c245166d829cd8743a9c726a803be06
+Clang.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/c4cd7a9478b44a4a518d5ebce3d1193d4b55c2d3e107a9b949805f4669965d1fc4c6704254bd31a7c86792fe278f06374ee2c691c92d9f32ca01c12d574727c4
+Clang.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/53686f336daa4d76dde90bbc0324635e
+Clang.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/20ac50b52bf80edc6a729e513577f295e7b89a8aca3d618bd323cf0c44d2313e49e9549ade36100855345291425ef966ddd342c456ce26ea777a0b82ac837551
+Clang.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/4e5fcb61d9dea624dc30df1c6ea3e7ad
+Clang.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/30b407d5420fd0b3da4a82cc5e1b1e7a8c1764793ca3e6d6c0dac9bbc212d67d95037ef8a6fce282299f9a3232371e410e2c8942f1b5958ecedcc8879e19ecf6
+Clang.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/70fd7d0b22447a1e1856bae3e58de0fc
+Clang.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/4e2446c562834e308697135ec775b1d28119822c7a0028249c19ae9e495ed216d8e135781bad5ea722322dc5d24b725439cff9b4af8555b24886008690a574c2
+Clang.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/e30ae623bfd0c7af72d6ddd87343d3e6
+Clang.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/df9982ff7d83d9e25ec796d3de2a22d8bdf34d463d9dd0b4ed65289a3295235dffc621efe100408f7fced05c611dd44c74fbc4599ff0a4fc19d22fc305fcc2c0
+Clang.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/a4bcb55a537e553a0e95392e924aa46f
+Clang.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/6747a2b607caf0606d4c5ea06f0c23fe39d8c31310bb773f18ff8a0e8e9a9b79e745f5d5d1c9fc69adcd1f3561020814e3088a693acf124662b81b8e92651455
+Clang.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/a513038e7b0849ed6daf1609ce5bfeec
+Clang.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/3acf8c08b04137d5271cae03085eda859a1b3c519b9540d627d2b05581964234523d2bd3233bf59651f306f1cfa7e5862792503ceda39f0981ab25ced12b9ab9
+Clang.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/11214b74d6b50c68a5e5e7e21a9b3787
+Clang.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/ba7c29fc008a4b0c10530383ab30c0b2bb60f3572d471ded7089bba088163a05fc06af310931f12b057a8a932914bc6a0e0efd3f9832ae52a34d3cc066486874
+Clang.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/7802c4438fc9d83bd456cef8eb1bc2d9
+Clang.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/a24796700d12f15c827a43acab0b8bf9afd0f68ee32b4b98e8aa5d3df152f46230b13f072d50ac25986fde79a822ee78d595f8d1c28cff168e89a4e15d5cbd09
+Clang.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/bce9c20a48ea31265d3a57ecc598c146
+Clang.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/d35d2d1efab91fa2e7e244bb4dc9811adaf01cb42f448abf0ee688381959c304a5cc4ae5fceae9ed60a2b0d34dcc795b8dbb405b1f770e7e05a3d16962d97f99
+Clang.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/cddf72884b50d35dad7173625285d2a9
+Clang.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/638dbf23827b7486a90ef5f1d79419a7c09411d4f8ad8da5fd20691d38185c28b5aab5b9a2acc43a4e596b831cd27eca489efe79e007f7540f98cd563eff8624
+Clang.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/86c8206b2e38d5cb7365dfcd50d1f027
+Clang.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/60d5e9db3d3bf4d8309e462b6d47b258433cd77fbdebfafb7c5f07ec4bf2408e61f645717c260a5c4be150fd86905174b9a806b194a153883bb16590b33a623b
+Clang.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/83a0d75a072631c856ed2110acf8e444
+Clang.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/908fabecef669ba579bcc59e911b68ed8deb5b3bda49d0dff6ae01dfa44526ebca81e83238a1abaafbf6b249e56409652eec304e0e82cb056d6182dbff9fbb2d
+Clang.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/8ba30a11ea311716ff27cc7631837f9f
+Clang.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/3bace26a82809153826ecafa440e563576178fa0481c754d5d0b8a3c60cd0caf5d4db8f8c860fa0779bae768a5a6f4427951baa00588713c19328212662700b9
+Clang.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/c7ab6f58f6de37432b87c6f8607fa455
+Clang.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/1d9c9553710f0a7453a6d53b0529fd08782825870bac4074f55a2206c5f7372ec8578a951978f09aaccf481919096f4c4f16db4bb1ae707594b747402cad8213
+Clang.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/1aad4f620028b50ebb9b537f57fb5c48
+Clang.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/0dfc1e8d2dfbe481848f15b543dc1f22d43ca8a83ca4cfc870faeef046453ece621a866c1dbc384c58aca11ff607cc14e77c64438ca2f3402ac78c268671af8a
+Clang.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/2d0b4f4f249d7e910b3566c9fe8253ab
+Clang.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/e95329f809489ce64ba34e6c9bf773f6b5be0e69250bbcd45df995b7a88f4583339be3e9c1e353740e5047954b3a553b1a76bfd40ede5fc23b3504ec6dee91a2
+Clang.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/8d7178a3c1aae01a39a56c7b80e3d584
+Clang.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/6a838935a32c36c106cd105d3e0a8324c020dc400446a11d3b4dc266cd30b31eda1f26215429607377c332e989085331f6886ff2e3ec990711ae836eabe0498e
diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries
index 8ad65ac42987a7..e351d100cb481f 100644
--- a/deps/checksums/compilersupportlibraries
+++ b/deps/checksums/compilersupportlibraries
@@ -1,92 +1,92 @@
-CompilerSupportLibraries.v0.4.0+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/df1bb35cddff18f4512b57a2b36a6be7
-CompilerSupportLibraries.v0.4.0+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/d01fb4077b97d10d5b4c7567146d82d7e6a45a57296cae14e97fe3d4174083553d9318a0ad75bd492315fa9ee213bcf04cb3841c2a6302f87fc8a377e756f459
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/81e354bb883997335f1945cffd493978
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/5c1f79488019d9bf46906e5e921ccf0118b762baf522b02314177ce81ccfb839e75226802c844462dc9a70aaca609b465348e43297b918deb9c4d3c8e36a6b94
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/c9de5672d1e67cd2541eb8ae25188573
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/30381588684a1fe4818ef6e2d0e3dcf44f0dbf81fcb63a043b9c8e4b3a34e6b7b09511e94c3eeb5004ca45a17b9fd0b7e102bb0176672fef09f1b940edf15a03
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/b2f930fd61a2c22895f3102f950df915
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/89e3103c28429d4c9a020be7b0ed9b374bc48d2971671904e889a57a9bb9b8c81011b20143c0152814d1191d246d144242e4c71cbd45a3c4bf50ebd0ab967b15
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/be7aa1b8f769bcee21a615d4819f00f6
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/3404792c4bd24499fd83f7140c392fff7ac5360eb55ea580808d051f9bfc9c9f9e7b59be25ada594d71e3d94610fc4eb397daee6d84fa6bf4c988f6881e19c33
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/22e9e895ca30837fa5165bbf4b74e482
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/04600705e907ca1ea4a203d5e696171bae4777c4c9534f4ae3d20525d39be60de1916a2d0b12b7e710d144ac47b2472754e1b3e01295fb43934e6ed8c9201766
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/6a2518ceb9c3e57fabf0db12090a0872
-CompilerSupportLibraries.v0.4.0+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/19222a3dcbf9afbdf43192943a0a7ad9cbf4762c1a681567cbbf6093866e44465ff566a10595220c9366d92dde43dcba84d41c240e2192da042ddfeff3e9f1a4
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/96f355abd8fd31d6522c6d7d5b259971
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/1fd436c9e4d15ff199fa60fd9b715c16effbdb6c15f86b86e9b2dbfca70136da71a4a491caa2650eb415fc32620b40295729a6b375efe1a08f9c34cbbc317949
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/1aae7c2cdaf1ba04b9bad8119224eb24
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/913defbddb5d5069f1d8b9a1a4a5c2e325538baeb0203756cc91de45fb52b21defc7cbdca6f454803f792dba895ae146009d667ed508727f4aea627dc1ae419f
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/f1c35daa5d66fae11adcf6385ab6e095
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/6a7a9ef08709ce5c805ea19fc33bee65b85ee576cae747327eeb12f0950dfca4b871a0e9799d4961e5d703a0a6e116145542078d124258ab3a949957e368173b
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/fe30bb318b88b3270e92d3d8f84abb5f
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/d54f5e0c7780a8557b889b3fa5d2fcca639521d00d624a0a4a3f4a46b854a3615989040f122243f5dfa266d8f6ce700bcffc9ed1653f86eb8279d5c31b67c6f3
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/489e4a98eadee286d3c32ed08ba2b5a1
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/b4589dddadd17ba1ac3e086a9312db3301e5d828b4c17e9e8db3ed018eba4757c84282dd78b2b2c84f978fa9154c9f46f9e3f1e711207eb19ca0f834239b835d
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/cf0e32142c441673680d55b7c547a629
-CompilerSupportLibraries.v0.4.0+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/698af44f897dfb0682b49532666896b2d3430f1a172921f9184e17404e714448899a227882f8375193d0a0476a777754e7f679bc0dea987ad185eafad30eca0b
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/16897e51e3a1d61d1b9a7abd6e728d2a
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/3a9a56d80c1ba1eca7bee4156a7d56938564c27366a51395d6c8f74c3bb500b0b374deed3fc1efc75942e6a66dc354808a6c7a66d1952eba9dc81c9e0fb8cc69
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/840d73fbd5ec7fe94708cd8179ccefbc
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/dcf7834d1f2b67356baedf0d3eeba4f5d3e62e0fd475ba3329668e18608fe1493fba07c9737bc4474e8276b09c6c73c76dcd6fde44d96149b740398c218a3432
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/50a06e1bc8a8442c7a1369bc837ce504
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/868970d00f5387f42a632c5fdbedc3e9135b6723ce3b1ae1735fd51a4a1faf91ab5fae94bc99c37aee75d9cc9547a7259cba584d95e0971ef97605592effeb39
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/9998e35984f1c3b54b45dd8a4af2145a
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/2e653f25d5652003d2cfceae4d786187ea3911caff8555d9f4e10e0ad60d237afc18fdc5247804e3347825682e6e347a92b70699ba73bf497ec414467f1cfccf
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/64ac31833d8d69b57b6acb1767cf110c
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/31c762e0c5d8a0652fda77e10eb5d195d8e27e8cce9e020d8c186aa904e55687cf2e090e0eec55aa6e5e885fe165c099d05c93efb45d83fcd438f7f996a131ba
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/805a1d491313c87632b6e4527e499129
-CompilerSupportLibraries.v0.4.0+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/72d7e5387e3f1fb69f7d0ce51fed36cdd794b5c284dc9514c93e26458df860f83ca4581f4f6f53e838b26ed3563248c0beea7e8e2020e16d30a965e8d1f4de63
-CompilerSupportLibraries.v0.4.0+0.i686-linux-gnu-libgfortran3.tar.gz/md5/48eff236a6aedae30f871e9e5d1c14a6
-CompilerSupportLibraries.v0.4.0+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/b913aaca516fcad91f11f4f797810d8f5fb240d92e428607ef982558f8e74a1dbdec3eee5983a5f0b7bfa9e6e750f724de4193b007fbfabfaf6705bd4b0fa523
-CompilerSupportLibraries.v0.4.0+0.i686-linux-gnu-libgfortran4.tar.gz/md5/6b377e6b8a2b120acbd24319dfef4efc
-CompilerSupportLibraries.v0.4.0+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/65248ec4ac7997faeeb4abba9a79b0707dab9d0dffaff293039f7c0886f972d1e118738e37c4e2168228de11b498be58bff996a3a335b9ad2799146f593106e8
-CompilerSupportLibraries.v0.4.0+0.i686-linux-gnu-libgfortran5.tar.gz/md5/964ce992265ecac408cfea1e7fbf434b
-CompilerSupportLibraries.v0.4.0+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/c100995a619ea04361bba6de3e460b22a9187af61fc02d8c3f94191cf71f73e9ff2409d87078e34655be6aa352f75f395e419860b6dca976cb665caa20e31fbc
-CompilerSupportLibraries.v0.4.0+0.i686-linux-musl-libgfortran3.tar.gz/md5/3a1dab532cd7222708d27d1cab9428a1
-CompilerSupportLibraries.v0.4.0+0.i686-linux-musl-libgfortran3.tar.gz/sha512/836acc03a34cb48b079ac2c8b397a03d01c1dad522a6be5c3ad81da98ee0979b981b14184f08e7308bdc69bc17bcd897d400853b0e6ce9a7821f9ab3c6a6890d
-CompilerSupportLibraries.v0.4.0+0.i686-linux-musl-libgfortran4.tar.gz/md5/7bae09d4f9460ae491605f6642c3f1dc
-CompilerSupportLibraries.v0.4.0+0.i686-linux-musl-libgfortran4.tar.gz/sha512/fb5bad1260aa3ad1ee135120ce0cfda4034380d85970c458a6dbac9cd24e3a7b996bffae8ccfcbd30223273e766c3a7d5945ea42c5237811a2b4af1db5027e09
-CompilerSupportLibraries.v0.4.0+0.i686-linux-musl-libgfortran5.tar.gz/md5/c7e7cdd660accdeda123685c7691429a
-CompilerSupportLibraries.v0.4.0+0.i686-linux-musl-libgfortran5.tar.gz/sha512/7015eb83612711a93dda9ae5fa709f74e72b33922e7f48ae98e8bc4798b8057ee315a65a27c2ab041623775407d37c686f4b3d64e1af27df6a32fd4d0edd5441
-CompilerSupportLibraries.v0.4.0+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/2dcabd868fe7ddb4edd29078b840c093
-CompilerSupportLibraries.v0.4.0+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/a70e6ee8cf73aacf0cbc7b4ac5f17b8e6914e93b106caba907c5ce8f2ab24fbf28ef50b60548ff6bd8e47461ec971a4f2c7519ffe0c13893ae48e4d6bbfd6a49
-CompilerSupportLibraries.v0.4.0+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/0097f1075086376188cf594276edb421
-CompilerSupportLibraries.v0.4.0+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/47cee1709879bea6a7d537a31438b39b5baf19b94a7575e7ae2a2271174884f484069c24d8e1dc927f3cc89f2007c5c1037b4c76e1747a1f9512d5055e30900f
-CompilerSupportLibraries.v0.4.0+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/e00d8689da404039de3956327e5e1abf
-CompilerSupportLibraries.v0.4.0+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/24efa7cc047c9ebc7f9bb99c844691fc78333a8e78f11da4a0d8ef58e98cf64da20198fe97b735443220ef6860e9618b9e1bedfdf38b2e134d9366db4ae9da4e
-CompilerSupportLibraries.v0.4.0+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/118fede168fbaa2433c10c725ee445cf
-CompilerSupportLibraries.v0.4.0+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/447f0f4eaa72531c6118f65268d1e505323a210e04213358942b58a8a34f4e20c39306ac7d89f06e51df57265b04a1d0d4dad6c141b590cbd3389573ede0de8b
-CompilerSupportLibraries.v0.4.0+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/262d162451ffc50c25cf599285555c47
-CompilerSupportLibraries.v0.4.0+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/137afd852f57a3b4d56f7ddf1ce2f7a3386bdb26d7e577e91af8d95e1fa08c2eb43623925158248f2b43df346cf2e87f308305087e3a3473eb6a0526ee259a5f
-CompilerSupportLibraries.v0.4.0+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/d3b8e3b11b74d1cac626ccab59e652b1
-CompilerSupportLibraries.v0.4.0+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/fcb110fa7b7966840a00f5fab2968d543d1c15354222ef6e693197797c337a3b9586caa66db7042280ebb46de51adf4903cb21c79b4cd1179f5aa1d4082e270b
-CompilerSupportLibraries.v0.4.0+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/1f593b25ca831267c4e02555304e447a
-CompilerSupportLibraries.v0.4.0+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/94b502cb4c02bdc1dda87e3608d9807925002eac68883c26cf27e358d48eff41fda0832fb6118c750a1386192e27bd6a0c14c9510d114edbf43efdc2e5bd1c13
-CompilerSupportLibraries.v0.4.0+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/a23d18444255c8ebf5253dbd470169ab
-CompilerSupportLibraries.v0.4.0+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/50737af89141a126766829fafe19599e1dc645d8669cb10f05111aa4c031f3fffecdb9ec98dd4ac9fab541d5947582b284d650f86c3116db12c10387be42eff6
-CompilerSupportLibraries.v0.4.0+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/dbc289277a5b2a960111e2a5eefa7f0c
-CompilerSupportLibraries.v0.4.0+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/4affcc86a550400f5fefb0337a1d245849ecf7fdd305790197c60112a0d1e096fffdc4ac140a82952abf6eab220e8c631c392b8903ea586b5c0345c65c9116f7
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/dfc62f03e7b65447ed877a9eaa537e07
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/f782f5a7c87924e134778ce9f7f98f0d48f4626f8b7b371a5c7799cf5bc7ea3cd6cdf275769dfc2b9dc2da82181e49b9d77833ff5241d2600d58c9bfd78a189c
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/6e46faded79b17c6b77846fbf2e5119f
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/6cafdf6d79bc8a7125857c148282384477720038293ccae83dc9065f8457ee28d516468bf9c96320086c9f2dbefb6f57ce77a952bc105de2b67e47fb0b0b8a1c
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/68d2354ca386ba9ec4e98504fff2f222
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/ed3d1c0cbd34f62abd41694fc73ac38a34bc19674526e41090f66aaeeafe87cad382a5b17cfe8a622079571d646735d273249acd108bec9a1f42c195da98403b
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/ed7a41374016ffe35c8068b2090fb66f
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/a8089a9704b8ac0b7c8de2f329f4eaabca5daa508e015cfc6a1fb668e78167463a30d9c96f23e2a518dc8b730fe606253c360a13689fc6a098dd18ad61ce1fc7
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/1f425bdb591db55d4a821a3b83a1fed1
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/24a1d1fb3d388eff2521c7b0eafafe76b29c3571b784af1dcc0ff8ea2781aa81b26fcdd03a0a1efaba9a913b6b67ece29c3c867a3cc93dfe46eecb3fdd2ef5be
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/6c19ee02d1bf8be22d65155d0d430946
-CompilerSupportLibraries.v0.4.0+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/aaaaf2a731e9009171a555b66fd2b662e2bfb207ecf84cb54138c770bba0e8e5910190a7a279b1d1d6058645a2f0d8bbca92ce9ca39753cc14c886a7189431fa
-CompilerSupportLibraries.v0.4.0+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/6c9beb8a213ccae10713d336e4c48c1b
-CompilerSupportLibraries.v0.4.0+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/da25d33a824e71d254492c891cad0a0fd9e472e5320403830a4bf8c0f175f531775b4e468832c702edf1e46dac68725e04ccdcf8914ac6fa60b27cec45d5aa1e
-CompilerSupportLibraries.v0.4.0+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/dba2d41d040bba44d9a7767d4fb6c329
-CompilerSupportLibraries.v0.4.0+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/16f9ec8a9897832b3a7408e192f9d0af222f369f33e5a698fd92f54f3c4326f0632f1e8ffb6757b6969d5a18f594f8f0e70457f8bb78a7740b8b26daf22171e4
-CompilerSupportLibraries.v0.4.0+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/1dd16d636fc9a87668a9c6ad2abc1a93
-CompilerSupportLibraries.v0.4.0+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/6a2deb304127c06baf260e2614bdd1d397fff3bc0b11c192ed0e43b3681af805dc5f17d84ff4836b108be65f879743a13b49122015d5f343f5ff3c895bf46de8
-CompilerSupportLibraries.v0.4.0+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/2edeb2d7edbd955a43ca3efc8830e292
-CompilerSupportLibraries.v0.4.0+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/a89faa9dbac9ddb3544a743c61636a77ada4af5fb637f9391187fdcde293d766deee7a60a878a53a08eae6f35e4b64b981c0ec5a39a2038299549c07e0bcca10
-CompilerSupportLibraries.v0.4.0+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/50d2b57735d2cba4a5411414a2e26e6d
-CompilerSupportLibraries.v0.4.0+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/f6db77083655181c0c28c2056962559ac5e77e6869dc990ba21e5d0c265d7705c482088587f6871dff1142c4a6ccc12ed24329f6c1f99146bf43f176dffbe28a
-CompilerSupportLibraries.v0.4.0+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/b1479d5d095e9f2b198e48ff24f0a0f9
-CompilerSupportLibraries.v0.4.0+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/a37fcc95658e91a820d6a4d13b51cf8ee6820b62142686ced67768d8f03eb09bd8cc3d0e7afc16abdde4a4fa764ece7bd92b8951cac06ad817e7ad13eaa784e2
+CompilerSupportLibraries.v0.5.0+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/307711def378e337a999c182aa7e07d8
+CompilerSupportLibraries.v0.5.0+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/0dcad5e315e045397320f667b27fc378da898ebfea9b55a2837e68b29434fe2c2ddc9652cc75a4551062ce70a2bfaffa8223c77398aa41fe1a73ccb44952cd8f
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/177f2665038919c3f8ed968226ff3b56
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/ea67c3b9986106aee12e5f22ab3d3c5d71a58759a7d20a7724bbb198e5c71f42fa2034e46f3147006a2d2277b3881f0546030d1040cb9393e58eeae87eb82c4d
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/f16db35be9018a5c61eaafaaf7226d10
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/051b5a0dd2235eaa90557e487c83499b3d7e0b9e921f7b2f14e77c81152c338acd5bac8040bdf6679db656cd8039093db43565f843dede253717425e464e61b0
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/e6082f3e46b627fdaef09f1ef81c1d7b
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/13d0ab1c0e84a65db729ea6bd45a868d9d65e1a0ec95412448846d1044e2bbf11b11d96cfa576dccf3d7eccc4bed4eb9ae4bac0989e9b1b97adad5e404dfe4a4
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/00703177897f8c46a577c2b0518432bc
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/af14ad1303f3918dd691e0b509ea0fd52ac7c9f0c285e8dbb741bd34ce0b1927f89f219fcf8d260315c503b18bf98b3df117810328066a9964917cc34968ce98
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/f823b692319cd370ca59189ad2ba4a3d
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/b0c4131bf4d15c482bbed83fcc570da2f7bb8ef99d507e0e13eb0c8f5519ec73ff234c58d505294be3f8d39b6dd1c7022578db02005ae111c7873243e8ddc8ef
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/a9ef1a68518058fe6c945e8b00f8400f
+CompilerSupportLibraries.v0.5.0+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/6aa53edf48a17ec8515cad5c79a15ab0e40cc44c9ffb188fd57fc560dde7a99d6487ead6e4caafaa9912c6590c6a391f914016fd4342589da09d56c657ad2c07
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/d3aaf50955ad671917e941e0dcf3803f
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/72983b2272300c2332cfe6864b5dd5249bbbb181bd65b10bf6bfb3a37e5e582bb9c159db0b63a077066a325899a2864717f28c60c85027be3b637bb80f994e52
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/e221d51df9b18b2562a0f3e8dc8012cd
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/758b07b4a559dda747574649926333a70355e2d80acb2ea37bb39777c0b1cecf8f308a5f8062110c378db2230ec8baf23385ae313d1c58de8bfc651573c64c1f
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/96f7feef9b1dd7944130de2e9cda68b8
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/8b4aaff1388cd506bef7f3a9edd42ed8ee1db468a18d34cd5d58d7da305853dbf48d4665e99c06c6fb0115e421d19dba5c36e947cb06defe7f479a05b547f112
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/a1e3642a7ce2b7834aa2f1b695a9977c
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/9d22b1fa8fa8eaaa5316cb494eb223e0fe73660aa5ca7518180e40d296d6d07a9863938501e5d5350bf79e79d975d7d66dca12768a0a69527d2c17baf7aaf345
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/d897098fd98928c2d644ed5ee26c3faa
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/4aad051f4f1e3d744825c650363a49f39e04cbd44dad25197ddee1890339e9441aa872f893478a2d8ff556c9a70a89c2885cd779ba3efd3c0f7193c386b820b7
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/c36bfd4c5b90d55c55bc18feaf51b134
+CompilerSupportLibraries.v0.5.0+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/ab16c638780a0118b930ac587df81fa74d2731bf1af402266106e1ecb791df353c1f368a8e7fc9147d390825ff8624e600aae45f1f6ccfc0015ce131368452d7
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/feb76551e6f7407de3006a3d363cee7a
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/976f8e34e72231b013ea0418feff9c3c9efa7b9c34688aca115a03f2bade8760ca9f259f8f502ef5012fbb389f4bf365fd7639b066daca16fb7ec1d32b5cd789
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/560ca43fa6dbd3f2e9052401477df165
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/333c7f4fbc172e7fd3d99e2673dbed1d9c699a5bb29a20095a255fadc89ded05abda755fc167aa8a16a4e93f524390c9c817df7b67fccdca88754d0301259977
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/d3ac5f871599ab225a1128c302486345
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/adb706882e923978b6e18c7134578bc86ed4e031a7a0120222018cd1b8efcf530854e426b6442dbd80b8c77c3677f1906aedb12c0ddeb33efcdd3bcd2c4a109a
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/58774aa398a63479af3f4c69678d0191
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/fe9307e6fb0b54522495fc9cc48756a60fc79af27d9e73bfb3ee49cbb366dddec1beedca03614f15761b308bc28014205f174f673fa258e76d5947446b87b039
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/af1a8ce693ba307e61184f4023d73d67
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/2ea581bb44408fc789ac306734736f6eb6cf0a15b234f43a6f50ae8f10014b5689f5aa8356112c2b54a86b9a7734ace3479c4e4aba1e5df636dda3dcd09b7e28
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/20d62064f495877f12b7e87e684ad43a
+CompilerSupportLibraries.v0.5.0+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/31b1c7c9fe3378e8bb788c897bbac0505a5ae70f500f3b1457325dbbb149c14224a88d17fbcf453465d8a572f33157766bb0e815cce7c8a2aa8a44422d34a365
+CompilerSupportLibraries.v0.5.0+0.i686-linux-gnu-libgfortran3.tar.gz/md5/fd4035aef1c83be0b865d70aa35e770b
+CompilerSupportLibraries.v0.5.0+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/a72047e7071838899d75896b4dcbdc102bca884507f4758b4e0dd62f50c9ce584f2b2b86d8b67dfc4fce9864faf9723056820e464bbab1a6173be47ad941d6da
+CompilerSupportLibraries.v0.5.0+0.i686-linux-gnu-libgfortran4.tar.gz/md5/89715bfa0e69528d4d294ed449ef0e09
+CompilerSupportLibraries.v0.5.0+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/6eb7947c72ec32d189221de42d5a76423a1fb5745db0812d88afe7f961d8f42669c7cf487235c1dcc81fbe73106b785c906bd6741e98f60e9931f4083be0e9ce
+CompilerSupportLibraries.v0.5.0+0.i686-linux-gnu-libgfortran5.tar.gz/md5/5c1c73dc72029781847f74bcb1189c4b
+CompilerSupportLibraries.v0.5.0+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/642d35ed41a65c7a2d7f4f127f936d3cb1665c207aa5feef25cce09cc11e733d7ec129673fea873403567c35cf16122ed1635c303ba13bb3349be44585f3ca82
+CompilerSupportLibraries.v0.5.0+0.i686-linux-musl-libgfortran3.tar.gz/md5/f91c962e7bc3ffb825c7e5fb1e099ba6
+CompilerSupportLibraries.v0.5.0+0.i686-linux-musl-libgfortran3.tar.gz/sha512/f89df221ff80bcbb1e6edc2f9cc28dc138d7d6ae99ac018a3cdc9a09ba637f1a9938b1f0876086f4f822fb911853286dd4f1776d603a403190bee052431ae572
+CompilerSupportLibraries.v0.5.0+0.i686-linux-musl-libgfortran4.tar.gz/md5/d2a81da3371a638f76087629ae0a6507
+CompilerSupportLibraries.v0.5.0+0.i686-linux-musl-libgfortran4.tar.gz/sha512/67941af15a0f032a853cdea180e4f87249bed2dfd09ade6fca9760f5a44b26fc94a0d6932803edbd27b75aa8d26e64c377af2d64ddcba3206562be1427a64c80
+CompilerSupportLibraries.v0.5.0+0.i686-linux-musl-libgfortran5.tar.gz/md5/cec9f3b9d4924a49a34c632efd167752
+CompilerSupportLibraries.v0.5.0+0.i686-linux-musl-libgfortran5.tar.gz/sha512/9320eee2b6dbadd4e0ed3f8763d58854eb179b1d1661c8f1dba75c22af2330812040507944b0ab20b7a7cb233c9953a1d3a4b27937e7b7a858aed2255ad0fbbc
+CompilerSupportLibraries.v0.5.0+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/c36411b24c8bec4805230bd4fe0f2391
+CompilerSupportLibraries.v0.5.0+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/839b447efa46caffa699258ec8ae5e0a55d7f98a7fc037b48e6a6c29193e3d8bf48397575cc518716f41e2e9344daa670693df605a1b9d4a23d3f454ec5ab399
+CompilerSupportLibraries.v0.5.0+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/d2e392edff3525afff6734fdf47c9ab1
+CompilerSupportLibraries.v0.5.0+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/1816c7ed409acc1435c7fcfd550b7664a08b31ecf433a906d8903a60ed458dab0fa712bd0d1590a0dc8506763a617446ba402efc78a2c010562c45e8eca66a88
+CompilerSupportLibraries.v0.5.0+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/2cfeb5cd0a7e2400c9be3e846a1875d2
+CompilerSupportLibraries.v0.5.0+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/ca620dd8542ffe9a177b0f95712e77e59b0fc1044e0186dd7468a86aba4d2b92931a1d6f980e75cceb26c6c5f9dab427f4ce32e0f77998b9a827b3ce9151041c
+CompilerSupportLibraries.v0.5.0+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/8ba0e4070358839909934d8a1bc9e0bf
+CompilerSupportLibraries.v0.5.0+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/8750769ca321f863fbb354f6e4e76b1241f7e24e5f4ea14ea511486dc5bc4fe8274740f1500149c5ac85a8214a0193c9a09332f35eb47e6222bef9070eecc6c8
+CompilerSupportLibraries.v0.5.0+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/50554a092af3a4a651b53e3ce3cf8a2d
+CompilerSupportLibraries.v0.5.0+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/53ec765d4de3b0bae9727b3b2a27437b184f2072aecda5d0b22d648a95fbba777bb89da823bc851d7242cd3f8c212e3fdaea8e5af11db21c578c2e12db51991d
+CompilerSupportLibraries.v0.5.0+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/b09a5913b537b26aa7f8996b1877c748
+CompilerSupportLibraries.v0.5.0+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/b68020c1b1acf4a1c51822bccc1eb67574ceffae3c133e7efe22ec0cc3a674a7c056c01be02c1c681f469fe1443d76baf4b0e305bec8181e57c3ce5a446a5c22
+CompilerSupportLibraries.v0.5.0+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/1e4c5d2084f76eacb4419214668c6594
+CompilerSupportLibraries.v0.5.0+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/696155b560bfaf592bf7024ba0e6f084382dd269cdd25416fa8840387c101132901e94709c8d0534f038666a6f6849c3d55e8bed4223b5be499e099b49610e77
+CompilerSupportLibraries.v0.5.0+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/63b386e59f3732d03459c59000fc1382
+CompilerSupportLibraries.v0.5.0+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/f6c7e0611df7fd86cc9ca63b380e112561d10b489bc8fbfe911c441ef5e87776761d3c161ff5f6aade479f7e96456084c6939d7eff175ced4f42b3b9ee29426a
+CompilerSupportLibraries.v0.5.0+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/07e22a4b58aaaf145e52b36602c5b08d
+CompilerSupportLibraries.v0.5.0+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/8a047b0098e8504e2dde0113170416686bc70f9d685fcb19bf3eb76afe30dc16a3b0d2023eb704c25025bbef87e99603dbd2a2708b1a3df908747b06cbfc92ee
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/23048b3be33f184ffc9be42ca914aa3a
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/4573b21e34f4d8127a86c18f95065039da92eeb9ade4058bd8459034bb4a003ceefe29e865089126fdc36cffd95a9c12bcb72ed74bff5987a9d1f4b300ecfe45
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/3314ec0668abf069c900558de0690b65
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/d012c4674401773000f0de831cb8b4b6c454d0ab68d51fbbe970504e76c693211086a24a7df34de2390eaeb438ab23f63c68b480a408ab2136f442aba5094bd7
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/e7768c00909613b8f29f6a5860ff4247
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/43c29456a0fc74c4fda42d088903651c6bbac6b842f2aa600e3019b391b04158ee97f884e6962bd9e7a9cf337dbb1cdb2151d103e1dee5214ba798b167b1ed32
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/b2a30e92ba8e40ef070e3ec7c16b97f0
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/64a4029dd1e84922728b2c93a455d7d6b262c979dddf59301ff96e9c28980fbd9c1db57e81afaece96ccb51b9751e5a0180b84e412427430487280c56d8da266
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/b0610d32a80b3f87baebf0250b0f92d6
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/3b7098fbb82e4a7a903b82f942303b248e0e35be13a47e4839a036085c4a33925f1f78fe941b852331cc52de80f32bcdb9a64ccff0386e1070a6ca4600c08eb8
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/3f905dd4e8b3cfd2cc3f8efcaa50a407
+CompilerSupportLibraries.v0.5.0+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/22af14d245e3c062131dd274afa6d9c7cde9a11ee2455e27ae2f7725a025fc2cd6cdb3a1a3c899988c6c3412a714c1f0763f4e08924726212405938c3cf66da5
+CompilerSupportLibraries.v0.5.0+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/2c56a22c935dda76831f36c713cca099
+CompilerSupportLibraries.v0.5.0+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/6bd9bd6ec8b6b18013b3c6de344de134835c9281d39bc5e6e31928970c60b584fa625df18efbce3ea571dee53011dec73e9aae9159e812f219692fbb4dd86a2d
+CompilerSupportLibraries.v0.5.0+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/e483c3e85b4d4b2685ee4e8f09951ac1
+CompilerSupportLibraries.v0.5.0+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/47c2f305237ccd55ed2ba445cbcd599c23f9c1392388017506f9d61a4dc8fec4ba4136be81a0e82de4f161f6788c4a62acc9d71efe6cf90b766e5339950ed337
+CompilerSupportLibraries.v0.5.0+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/41c25d9cf7545721b8d4dd2386e95ead
+CompilerSupportLibraries.v0.5.0+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/173570bbf4eb60d678472058ec2c18732cd27ad2911457c83f47a1d97c1c0028d91005cf56539e51d4a04178544ac0bba47ea27e74b6b4e8d3310551ad3167fe
+CompilerSupportLibraries.v0.5.0+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/f124c93580a038ce806f479568b46597
+CompilerSupportLibraries.v0.5.0+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/c313390dbcffaea6cb5202645b5304134a1ce6aac5a3835696f45316c8170b237c04f13166694eee0f31903ac1e5c3cd73ad8974ba19b44289da3504d3436f8c
+CompilerSupportLibraries.v0.5.0+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/050fe7a6bdf980c198f4c201629d15e0
+CompilerSupportLibraries.v0.5.0+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/211e435f5e2b7209aedaf4a81b5e0d5e615b9144de248c06e43dc61b31890dbde80d718e74454b489bd1f77476d34bd01d3f9a25355bc50fca0dc07df0264cad
+CompilerSupportLibraries.v0.5.0+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/3566d0f714c1503b92160b486a4eaa4a
+CompilerSupportLibraries.v0.5.0+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/b2f29c1c6dc35e1002021f8f15a20a72a57c346b33a6d045ff7a261e88767738a4da1dd88aa71a20514bdf6376099979c9d938173fa3ae28641c40372c94db60
diff --git a/deps/checksums/curl b/deps/checksums/curl
index 267cc501c29c36..a1ef36078d1f01 100644
--- a/deps/checksums/curl
+++ b/deps/checksums/curl
@@ -1,7 +1,5 @@
-curl-7.73.0.tar.bz2/md5/42faf77a26f51af427ef5385472677a5
-curl-7.73.0.tar.bz2/sha512/09515257032c9fc4b5141c20ed6d9dc25bb867803ec13ec76bf7fe40c439c3b65a7529b2d48e9ed049c3ccd2f5eda28fda09fcd13dd9e53b707a60ec89d98ca3
-LibCURL-8310487053915d5c995513f569ad85ba65c3544f.tar.gz/md5/554c77171cdc5edbcaf209c19aeacd4f
-LibCURL-8310487053915d5c995513f569ad85ba65c3544f.tar.gz/sha512/59021fefe13e8a8e30edd786cdf1914dc3a3c1bd87266d79a0fc3b8df9c4ebe32fc0d9f08792d510be0219cd58f57b0627ebf6333c412f1b55c1ecf4e1af181d
+LibCURL-cddeb7f4a7d5718a4a1be602ffcbe68299a1a37e.tar.gz/md5/eaf82b82c59404386d9f5744a6447e5c
+LibCURL-cddeb7f4a7d5718a4a1be602ffcbe68299a1a37e.tar.gz/sha512/b286be84c9bc405479d13feb44ba97109ad17e1de8be8f1b2bb3125bede884311b190e0cbb0a0c806e7034ecd688b72dd11c1133cca1a2ef74370ec1200aa790
 LibCURL.v7.73.0+4.aarch64-apple-darwin.tar.gz/md5/457083bbbfe9b7602a62acf5df56c123
 LibCURL.v7.73.0+4.aarch64-apple-darwin.tar.gz/sha512/93dbb2cd0a126dca3f721370f1e92e1bd6ead5fb2971f7ec61c36b47924057ce306715b127fe1bd6e4ebb369cadfcf7f41d9f26fa367c185ee1b4c8a448f456d
 LibCURL.v7.73.0+4.aarch64-linux-gnu.tar.gz/md5/683c6cd6e9d5bec018402068678c811e
@@ -34,3 +32,5 @@ LibCURL.v7.73.0+4.x86_64-unknown-freebsd.tar.gz/md5/0b2b43c6d695d58660974e11b582
 LibCURL.v7.73.0+4.x86_64-unknown-freebsd.tar.gz/sha512/3f8a9e633d10c797f9671f4a0a13c32beaf1f97cf5afd923afd12e3431e57ba2183a3e7957cd9f6e59ba8262ad0f2171094e8b396e21cd00e70af0eb728e4fd5
 LibCURL.v7.73.0+4.x86_64-w64-mingw32.tar.gz/md5/5e62ab3c00ce842c0d50aa45cbae3b1e
 LibCURL.v7.73.0+4.x86_64-w64-mingw32.tar.gz/sha512/4771a522a6ddb4848f614a5892fd31cd72e1c88b1c16eb4492d0bf119495d672eb6a954e37d2a2376341ddd7e118718a4ac9453320b252facb131ce814498d3f
+curl-7.73.0.tar.bz2/md5/42faf77a26f51af427ef5385472677a5
+curl-7.73.0.tar.bz2/sha512/09515257032c9fc4b5141c20ed6d9dc25bb867803ec13ec76bf7fe40c439c3b65a7529b2d48e9ed049c3ccd2f5eda28fda09fcd13dd9e53b707a60ec89d98ca3
diff --git a/deps/checksums/gmp b/deps/checksums/gmp
index db01480b953d08..da510e3dc2388e 100644
--- a/deps/checksums/gmp
+++ b/deps/checksums/gmp
@@ -1,3 +1,5 @@
+gmp-6.2.1.tar.bz2/md5/28971fc21cf028042d4897f02fd355ea
+gmp-6.2.1.tar.bz2/sha512/8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9
 GMP.v6.2.1+0.aarch64-apple-darwin.tar.gz/md5/e805c580078e4d6bcaeb6781cb6d56fa
 GMP.v6.2.1+0.aarch64-apple-darwin.tar.gz/sha512/62435e80f5fa0b67e2788c8bfc3681426add7a9b2853131bbebe890d1a2d9b54cebaea0860f6ddd0e93e1ae302baba39851d5f58a65acf0b2a9ea1226bb4eea4
 GMP.v6.2.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/5384d6ba6fd408bc71c2781b643cd59a
@@ -56,5 +58,3 @@ GMP.v6.2.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/1499a265b438cf5169286c1830eb573
 GMP.v6.2.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/d2e6fe76abe0a0cb1a7445ea93cd5bd0bf9f729aec8df9c76d06a1f6f5e67cce442be69b66950eb33aa22cfda2e5a308f2bade64018a27bebfcb4b7a97e1d047
 GMP.v6.2.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/fdb4187f617511d8eb19f67f8499a8d0
 GMP.v6.2.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/bb6d8ead1c20cffebc2271461d3787cfad794fee2b32e23583af6521c0667ed9107805268a996d23d6edcab9fe653e542a210cab07252f7713af0c23feb76fb3
-gmp-6.2.1.tar.bz2/md5/28971fc21cf028042d4897f02fd355ea
-gmp-6.2.1.tar.bz2/sha512/8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9
diff --git a/deps/checksums/libgit2 b/deps/checksums/libgit2
index 0603b2ab9a6622..6d836e4a4501c7 100644
--- a/deps/checksums/libgit2
+++ b/deps/checksums/libgit2
@@ -1,3 +1,5 @@
+libgit2-7f4fa178629d559c037a1f72f79f79af9c1ef8ce.tar.gz/md5/c6a819fb0bf924df61e1595624a0988a
+libgit2-7f4fa178629d559c037a1f72f79f79af9c1ef8ce.tar.gz/sha512/3de9c042115b309dae3b8e0008edf2e762addd90a7bdb54b3cf634811271ab9dbfea35656650eb53a3faec73caf33ed199fb885ec21f611c79d909d9f4fe48c5
 LibGit2.v1.2.3+0.aarch64-apple-darwin.tar.gz/md5/0eb4d35fa6078de8da3dc79a420d5bc5
 LibGit2.v1.2.3+0.aarch64-apple-darwin.tar.gz/sha512/aff23e9d56827be54d8f515489eed92d9cb4d9a1b69c12bbee44fa1343ebc906b38b082c11e6ecb227d256de5cba19b2f3a03c022b88809a33c225c34db08f7c
 LibGit2.v1.2.3+0.aarch64-linux-gnu.tar.gz/md5/f2f31aa978e43aa68a0e73107067c7cd
@@ -30,5 +32,3 @@ LibGit2.v1.2.3+0.x86_64-unknown-freebsd.tar.gz/md5/e1623fd3f8f564085d47ec650a40e
 LibGit2.v1.2.3+0.x86_64-unknown-freebsd.tar.gz/sha512/295d55b78b21ef1c2ba471c8b5618b168dd633e986db9e1ec3e9630e352446ab18e8fd0992010b6afdd922463bb285bc45885a8b35a502d574553fe61c1f7b9f
 LibGit2.v1.2.3+0.x86_64-w64-mingw32.tar.gz/md5/c845901c4d9dc145f76469d45abad934
 LibGit2.v1.2.3+0.x86_64-w64-mingw32.tar.gz/sha512/21951f3bc902f30b8cc75c3af233aa7fe8457e412e7758d556bf71de149c7f2325a5c4c204a7a462cc6a61b3dcb90f0d25e684ffd8617b0a1505a1d31cf2f69a
-libgit2-7f4fa178629d559c037a1f72f79f79af9c1ef8ce.tar.gz/md5/c6a819fb0bf924df61e1595624a0988a
-libgit2-7f4fa178629d559c037a1f72f79f79af9c1ef8ce.tar.gz/sha512/3de9c042115b309dae3b8e0008edf2e762addd90a7bdb54b3cf634811271ab9dbfea35656650eb53a3faec73caf33ed199fb885ec21f611c79d909d9f4fe48c5
diff --git a/deps/checksums/libuv b/deps/checksums/libuv
index f03a3c6ce9a359..1dddd23bcd992c 100644
--- a/deps/checksums/libuv
+++ b/deps/checksums/libuv
@@ -1,34 +1,34 @@
-libuv-fb3e3364c33ae48c827f6b103e05c3f0e78b79a9.tar.gz/md5/dc93ae5119c8934f374570342ef036ed
-libuv-fb3e3364c33ae48c827f6b103e05c3f0e78b79a9.tar.gz/sha512/29947c236aef8931be4767df1cd8404ee9b036ee107b31cbce6fad9a97743df57d068b15bc4bd00320b9b81cd879258a9ec9dc675853e424ccdb8d6bdd226240
-LibUV.v2.0.1+2.aarch64-apple-darwin.tar.gz/md5/ed00585eb80fd82c014e2a431269ccec
-LibUV.v2.0.1+2.aarch64-apple-darwin.tar.gz/sha512/a98ffde4ff49a71699f798622c62b5f95d0dc010f1de88ad57ee437baa73cb25e263a8a6c4de86364fb31076993326d9bd0223db3e1ecf6904c1aa6e7e1f0120
-LibUV.v2.0.1+2.aarch64-linux-gnu.tar.gz/md5/3e75495795d5a4eee8ec9c1619a5caaa
-LibUV.v2.0.1+2.aarch64-linux-gnu.tar.gz/sha512/c0f1396ccc7784772d4c40f3a62d6bb22c6859a3258b07727348f436b7991a8f6d51ec46c09569f17a7bd600a321ab3b3cd59538d39c228cd3e205c33e755a51
-LibUV.v2.0.1+2.aarch64-linux-musl.tar.gz/md5/c2899ea791cfcd37ff85c1182330168e
-LibUV.v2.0.1+2.aarch64-linux-musl.tar.gz/sha512/b340ff4e28a5e566ee2640926265b2070acfcc4b3c87fc2e414e2b2a9ff23be852d92ff4f51e36e21de029f23bca5524e7e267ba091401b070f4d5cd9bd03c54
-LibUV.v2.0.1+2.armv6l-linux-gnueabihf.tar.gz/md5/6b6c080a88050051100af58a4e96f25d
-LibUV.v2.0.1+2.armv6l-linux-gnueabihf.tar.gz/sha512/936a4f4baf21a0fe7492bc3fab3475f653824daa184030df764af1eb5e71d152aa5dd3449b1cf31f77a460169853d7371597056e641c440c7b3d4f5c6be1ce10
-LibUV.v2.0.1+2.armv6l-linux-musleabihf.tar.gz/md5/032010ad683931906d2467753cebea9e
-LibUV.v2.0.1+2.armv6l-linux-musleabihf.tar.gz/sha512/b8050662775d75e59b072c688ae44f7a3d3f54d114270902a825e01f4d74c8e131a3a75cd95e31b9ebf4488d64ff6170a67e5986e02e5fcb105bf5d3cc28706b
-LibUV.v2.0.1+2.armv7l-linux-gnueabihf.tar.gz/md5/e558bedc0b69d6575e43df0eec958ad9
-LibUV.v2.0.1+2.armv7l-linux-gnueabihf.tar.gz/sha512/d3404b20b7e8e8fe935ca1e7da55823a6ff6703c822acf622638dc5c744bfefe1745e8e3a67054abc3aec0c10793ac46dbab29ccf7269d8a3a0d857e3a1a93e7
-LibUV.v2.0.1+2.armv7l-linux-musleabihf.tar.gz/md5/1aa605d9930ba63874483defb35a96ba
-LibUV.v2.0.1+2.armv7l-linux-musleabihf.tar.gz/sha512/927e710191e6b8d1c09054780b4af6336c6744ceb0885c7a5a0cec5e08bfb0d53ede75cc8bb145fda08f720d98a77e102a2903e08a0fef75a0b630631db6f35e
-LibUV.v2.0.1+2.i686-linux-gnu.tar.gz/md5/4ec3415ef12615581f8b26ec374a35bf
-LibUV.v2.0.1+2.i686-linux-gnu.tar.gz/sha512/fd37bb83cda297ec80332cdaed2a704ea43c3ec72fb539042ef09aa510275a0418c750278fca9e463bdecdca957f8457103f0be6eeae1017387141eb2b906694
-LibUV.v2.0.1+2.i686-linux-musl.tar.gz/md5/f24ea24837ef06be346d239cbb33ae7e
-LibUV.v2.0.1+2.i686-linux-musl.tar.gz/sha512/e9388568b20fa71d95e331a336aa3b17396e87d99aef6d752cb48416cdc9501e2ea887702a5765a22dcf6b5f7b730f5666ed3a639e7fe0113e9032f0d760b352
-LibUV.v2.0.1+2.i686-w64-mingw32.tar.gz/md5/890d1f7963a5dc927c15f8433b69dcf7
-LibUV.v2.0.1+2.i686-w64-mingw32.tar.gz/sha512/7e7d2b6405bbb1b62725a61d649fcbd53c2dcb65b8a6deea5a186717f88dbab4198a0f58d4223500aa991976725f8e1c4272ab29866174c5f555ba75a2e9b0ee
-LibUV.v2.0.1+2.powerpc64le-linux-gnu.tar.gz/md5/7fa0d0e9344f4a4c4d5075ec5d368b0a
-LibUV.v2.0.1+2.powerpc64le-linux-gnu.tar.gz/sha512/b5587e9e1072bc6becd5d1354294a3afcfda1c52e9a5f56387d43c7300369106059a2bac8669a919ce25d888b2302711c7433a82e366648935481568420daeef
-LibUV.v2.0.1+2.x86_64-apple-darwin.tar.gz/md5/716960539cbae1e38e1cf88c2670927d
-LibUV.v2.0.1+2.x86_64-apple-darwin.tar.gz/sha512/7b064d99428b312302c698e73e8a7919147c0522857a24e08d16144aea83429c5ac9526b6553697f28784457a5b417958fc5e4e28b4191861004dddc3f95566c
-LibUV.v2.0.1+2.x86_64-linux-gnu.tar.gz/md5/ff70887943a3fc68eddcb66ed941417e
-LibUV.v2.0.1+2.x86_64-linux-gnu.tar.gz/sha512/00610022d700dd6b33c97decea43490fcd4218fde2e57c0d6317abec046adf220fdf4d03f132938ec78af85653a5262d1344527c632c06aec53750710a6b317c
-LibUV.v2.0.1+2.x86_64-linux-musl.tar.gz/md5/a5834444d0b7e7d88cc87e5eb458bca3
-LibUV.v2.0.1+2.x86_64-linux-musl.tar.gz/sha512/e2e6e6726e8ef0962c35d7ff54a60b3370cd5b927fda8b4415e8d2f19b098ed9bd00e262eb18d11a73e2e27c88aefa72c3a6e9c193d27eab436c4d9d6531cd47
-LibUV.v2.0.1+2.x86_64-unknown-freebsd.tar.gz/md5/951d9da43208d2c48eb00c7ce300b4cf
-LibUV.v2.0.1+2.x86_64-unknown-freebsd.tar.gz/sha512/87e578f6cf34c9cc1c965f4958048967740b4ab530836aff33b3339c0d927beccf1f0c58f7e256c9ba98bf1fa0362186a24fcc5bb79ae1f149f86183b4b7f5c1
-LibUV.v2.0.1+2.x86_64-w64-mingw32.tar.gz/md5/4e9c2f078ed7b617a1aa447e1c44abbf
-LibUV.v2.0.1+2.x86_64-w64-mingw32.tar.gz/sha512/b3b14c5d447cd742cade43b56bf3867d530dd391c105ddbd7f2b9e0e26ee6a1f3e6fa11148a9ba1540fa598b155da3e56d369a96273a5ea1343b5c3cd4821953
+LibUV.v2.0.1+4.aarch64-apple-darwin.tar.gz/md5/c44261bfb4a254100af5085624e9805c
+LibUV.v2.0.1+4.aarch64-apple-darwin.tar.gz/sha512/f94b74fcd6a39c903f05efdd626cbe6af9016099f37cfbe0da50c0dce962a7998f884a38a586b14a9b5e7a01b96f653e5e204afbcf2c22188834394de3b3e607
+LibUV.v2.0.1+4.aarch64-linux-gnu.tar.gz/md5/5cf0c759aacd96784a81b464240901ae
+LibUV.v2.0.1+4.aarch64-linux-gnu.tar.gz/sha512/b8488345516cf424bcf4b4637799cbfcf6019b109dd6104784d09381a85d4f145c02d0e0ad3a3a3679b68b7d5a5ef0a9d63cbed62734272c80e8e3927eb047f5
+LibUV.v2.0.1+4.aarch64-linux-musl.tar.gz/md5/1c3ef838685ec4b32a68ee260cd9dfba
+LibUV.v2.0.1+4.aarch64-linux-musl.tar.gz/sha512/f2560edceeb680ad46a3f4146a0d22f28a6727e892520f9599f0d5a105b0d7776dadf688b48e773f7e5b2d4204d3f56bd0f8f23d09c6ac5b4d6cd85c05a20fe5
+LibUV.v2.0.1+4.armv6l-linux-gnueabihf.tar.gz/md5/bc7fa34f167fa6ed945ef2f29807e910
+LibUV.v2.0.1+4.armv6l-linux-gnueabihf.tar.gz/sha512/124646ac504e8f995bccfcac0b8ae5ef524016f1cc2f2e58e058b23624193c52ab7f554ea4ffcb3046422e638cb2422442a8fcfb9e8b828d173f1f97d5ade910
+LibUV.v2.0.1+4.armv6l-linux-musleabihf.tar.gz/md5/c123949e81d4e49c8e1a4a63327c2ccf
+LibUV.v2.0.1+4.armv6l-linux-musleabihf.tar.gz/sha512/b04aa8e293abcabf125e63d11efd56215d3605e1709b2635a7325d84b5e4de7174fb69695bde3c1e042309333f7ad80f8782bc8a9576efdbfe8cac62dcbba7bc
+LibUV.v2.0.1+4.armv7l-linux-gnueabihf.tar.gz/md5/eb031d1135a79615381f3010b85e4a02
+LibUV.v2.0.1+4.armv7l-linux-gnueabihf.tar.gz/sha512/13383beb19cf6fa6601d02fd7c193f27877ccc63acefd935edd2ff7c13d4b8d8b900b5571da19fe418e007e3ade4c49c1f64a971326abb50aca5dec60c10a4b6
+LibUV.v2.0.1+4.armv7l-linux-musleabihf.tar.gz/md5/09ce6bb24ca286f92675349d583c03db
+LibUV.v2.0.1+4.armv7l-linux-musleabihf.tar.gz/sha512/a71f58f61e7bbd479bb66c560804b99b0e4218df0e9b4b325b254cd58d8ab8600eca35a8b9b5e54f57099834ec22e36a1a11fb923b150305c7561242b7e62030
+LibUV.v2.0.1+4.i686-linux-gnu.tar.gz/md5/1efc848d7961a677cdeb0acec37d826b
+LibUV.v2.0.1+4.i686-linux-gnu.tar.gz/sha512/8855729060b7e59a5a34ff2aea209d4af84657d7b801e736fc374d49d338e1bc87796e3346eeac7340f3e8e1f8037bf420144f04d81b93d3017fb5a32eece43a
+LibUV.v2.0.1+4.i686-linux-musl.tar.gz/md5/71bde27fb51e9c7ccfe1c7eab34afbb4
+LibUV.v2.0.1+4.i686-linux-musl.tar.gz/sha512/588616fd1ff342e8070def2121fa2dd6be349e9ff1d19653d2414f0c713ba02d50a89aa7cdddeb19e6864654690c870164238204767990f09b277ddf788c9935
+LibUV.v2.0.1+4.i686-w64-mingw32.tar.gz/md5/090d2e845fcef61c3ef019fdbf7877df
+LibUV.v2.0.1+4.i686-w64-mingw32.tar.gz/sha512/aaea203d285ee490803852d27fc628763358680e05373208385f85ca33d14bc09baf63cf243fd45788ef68b415858e5d919178574322cfc9e4a42774227ba8ab
+LibUV.v2.0.1+4.powerpc64le-linux-gnu.tar.gz/md5/b69fd18d09ab59e3b139963b3988321e
+LibUV.v2.0.1+4.powerpc64le-linux-gnu.tar.gz/sha512/e257c2c86af1c7a7ab76cd08faabf7e74ef9fa462e0f8f3df346d11a249157b4b47130fad44b47317d358bf74233bb9b854c57a9b76c29c0e05f15f9322b8b53
+LibUV.v2.0.1+4.x86_64-apple-darwin.tar.gz/md5/4242ead21755564805144cf2712e3d55
+LibUV.v2.0.1+4.x86_64-apple-darwin.tar.gz/sha512/f7448587af8186c5eb59f81cca7f48f840578440762b22a7a122e8243509bb9e2c541e337c1f2d1e94599cce1a928ec6b6c14e219e412bed21d82a68416caece
+LibUV.v2.0.1+4.x86_64-linux-gnu.tar.gz/md5/46155e2617a76e3910f379c33127a31b
+LibUV.v2.0.1+4.x86_64-linux-gnu.tar.gz/sha512/fa94f0dea120ff5381e7803672dd588ef69990d488bc7124c662a55ab52805b874b0913fb5c2b623ccf22ff0d1065229c3a06f44669a758b8186464118902b35
+LibUV.v2.0.1+4.x86_64-linux-musl.tar.gz/md5/921637f115807c2f0b86d6a5c1949789
+LibUV.v2.0.1+4.x86_64-linux-musl.tar.gz/sha512/95ee29a34d919dae348fea2ca81d7549be8210143936987ea68f28271331983e4358aaba884edc5e1fd16eef8e9d35770f6b113d3f1db412a3a829d381b9df42
+LibUV.v2.0.1+4.x86_64-unknown-freebsd.tar.gz/md5/f4e733fa82a5a34da86a8e9d143596c1
+LibUV.v2.0.1+4.x86_64-unknown-freebsd.tar.gz/sha512/f6e390126d2e75c2dd32da40db48905c48134437e52634101d10ade67b7426101324ccf652bb4c4cc29272c5b641d5543e673bac7c6ec1c31f8d7e77f61c09c0
+LibUV.v2.0.1+4.x86_64-w64-mingw32.tar.gz/md5/1c8e63632fb40fa97805efde91de764d
+LibUV.v2.0.1+4.x86_64-w64-mingw32.tar.gz/sha512/2c64ac559beccbee8ce62dc0ce0277d8eaca5e46c13585bf10d2a79811bf4f5dcac30d49b2b6a02472c72857a859be021e4e7114f30e560f97c4e3979486dc7c
+libuv-c6869fba163a1e04af64ede438a8fd0191e75e9e.tar.gz/md5/b60fc7b00bdfafcbbc66317858882058
+libuv-c6869fba163a1e04af64ede438a8fd0191e75e9e.tar.gz/sha512/197b386af51eb4456ce65e2951e033731e1194fca8bed08755a78360ebb3431ab4d8d69a75279e7995d2e4197133d613892e5b9b5d6411bffa692df35542420f
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
index 69bacfdf5a9286..a865392ba8f72a 100644
--- a/deps/checksums/llvm
+++ b/deps/checksums/llvm
@@ -1,176 +1,234 @@
-libLLVM_assert.v11.0.1+3.aarch64-apple-darwin.tar.gz/md5/1468270825363bffb3d99d3ca9216d7d
-libLLVM_assert.v11.0.1+3.aarch64-apple-darwin.tar.gz/sha512/b94c1273553204440b5f51a3b1fa8c96ad8eae2090e50c3cf10fed44f960e5d0a6c95d967b7e006f842d550cc59b06ed8585962399772e1c2d6d1e6cf7686567
-libLLVM_assert.v11.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/md5/5ea96996524ebc1c69794d7e9ba2b252
-libLLVM_assert.v11.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/sha512/8fd7d5debae5277a5d099c24c195ed1c110f95169bc05ebe42fbb697683f2012a95507b682dd978a25918582dcfd548233d26a3e74a3098e85758dd63b5ba98e
-libLLVM_assert.v11.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/md5/49e5dedbdcad40a5966aca51c8913d3b
-libLLVM_assert.v11.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/sha512/107a57a298416760c2f806901080963c2b1b3d09f16bef698db6e28b084c36f86796956deff096b4e014a61b29319e1fe1ed95f6a807ab3f1a6350288f85160a
-libLLVM_assert.v11.0.1+3.aarch64-linux-musl-cxx03.tar.gz/md5/7d594f4af283181513816fe50d299861
-libLLVM_assert.v11.0.1+3.aarch64-linux-musl-cxx03.tar.gz/sha512/f074bca4a57c827c16558783b7efe8d31f12d4a0500a374ed9303a4beba2e848343d9d66da4bd0643c8df10004a6b7a9074a0f99bdba6e2b7eae939a892c7a0f
-libLLVM_assert.v11.0.1+3.aarch64-linux-musl-cxx11.tar.gz/md5/1328f6c829ddc5361e5217aa9d2ba1f0
-libLLVM_assert.v11.0.1+3.aarch64-linux-musl-cxx11.tar.gz/sha512/238fc6e8eeed72f6fc23b9ea5e9083b8812447799fbb73ba13b2613c6ae0d35cdd8835276e8c9e34450d8c682121cf28601dbc6ce6c160ccabe7c36043441d78
-libLLVM_assert.v11.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/f863e9b6afea27c93b04575194bd83b7
-libLLVM_assert.v11.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/deb3d89d5cdfdeee2a8665d79e2cf77dca693c66bdfd5e10fdba9cb54cefce27a85aed76804ade511a65aff5e1a99f66748b827253ffdaca6c5c55c5947e4d5d
-libLLVM_assert.v11.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/0fc8a804ddcb6bebc2ca285697427a60
-libLLVM_assert.v11.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/8cc0f98bc20bcaa47a6c6e28a559018b572b27c2c3507554abd2d5a09ecc36219881ab391625f47a581cf7a569534b693760abbe3b31a31b111c3a751051bc55
-libLLVM_assert.v11.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/md5/eaf1aa0154ba5906a48848b1fd4770da
-libLLVM_assert.v11.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/e4531b5eb09be3ac6440e32dcface0360cc75595da00fa1bd178306e9780bd26eb7413a705d4b49d3d7eafd8f403a448bad9aae40bad56fb40bbea8db75e5f06
-libLLVM_assert.v11.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/md5/57307cc154a85a84297f94f71593829d
-libLLVM_assert.v11.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/c8c8d35982a7df99ec468b688dee8ccf627512c6f009aea27cbd7fc8d350bcb915136753c18fc1a0ebcf621d9540d1995fae3f7532c31dfe70f1b3ea07275695
-libLLVM_assert.v11.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/701b49c22b69b6e4a91cc1559b3b2622
-libLLVM_assert.v11.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/1173b249d04df31a3d17e120666953654ce3274b749e137a24e137faa28b424a3a60bff9bf3c23f8299cdcb6b77fcf883b60654abced4b854e6080788a4093ea
-libLLVM_assert.v11.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/e781f504156bdf748cbebbae86173fcf
-libLLVM_assert.v11.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/572d0eadc5f3c495aef529d03fe3a3f890cac655fb7b8fc7dba92943023355a4494f72a2de5bb302756d5bbc36658b32babe4efdeca2f85f0b9f244f3c5e650e
-libLLVM_assert.v11.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/md5/b460698fc6f652366a0b869bfb2b1646
-libLLVM_assert.v11.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/00f3caefa2885be6550f20a86048057b6beb17044ea822ffa9fb9df65c848eb7ffafbac4f6546ca16a1dfc344cfb851b7d42cc3a7f3f8a2b19a5044748a6f64e
-libLLVM_assert.v11.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/md5/b4f0efa367651e7e37976cb7052eda02
-libLLVM_assert.v11.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/5af81df4c76dad4ab5387e3006f350f9387dbda5b131adb2b75669dde284a281af34f041dd00a0e8b6e0e8e92e7e4138fec5f83a6e2fc82fca8c3c6a637900a1
-libLLVM_assert.v11.0.1+3.i686-linux-gnu-cxx03.tar.gz/md5/ee47fdaec1107506ace19f70d090622f
-libLLVM_assert.v11.0.1+3.i686-linux-gnu-cxx03.tar.gz/sha512/32c7702bc599f06be6c5403574b1cece6d07b2c9787bd3b08e73a3746a187956609f2b7cdf0c673fbdd3105030138cf0c622a08e59f4180cea5109cc486fc570
-libLLVM_assert.v11.0.1+3.i686-linux-gnu-cxx11.tar.gz/md5/d6a47e2c969aed82c377f42248b54b75
-libLLVM_assert.v11.0.1+3.i686-linux-gnu-cxx11.tar.gz/sha512/c95f7e895832deed7d99155ada1df46e1dd1e43dd24fb8aaf94e4109f301b8163b8c8f83f3a2fd65d7b445e4d194f456e2cdd93ba36ed70ac4131c805a0c89e0
-libLLVM_assert.v11.0.1+3.i686-linux-musl-cxx03.tar.gz/md5/158ce4bcdb7cdf26faad269a6133e2eb
-libLLVM_assert.v11.0.1+3.i686-linux-musl-cxx03.tar.gz/sha512/f19f556f25e602da754e666476686411d484297cbe1d51f8d21e167feb241c44e1b94d502b4984649340c515dcf38a53be9d949805c29ca0e959101900f68fc7
-libLLVM_assert.v11.0.1+3.i686-linux-musl-cxx11.tar.gz/md5/fe72914bc292072e1f474ffb41e01a99
-libLLVM_assert.v11.0.1+3.i686-linux-musl-cxx11.tar.gz/sha512/87a300e6928b69a4ddc59a375a9c172cbef82dd14ee101ac97ea1a652e1e4cb69d8c1ba61f4e4c2889884c58a571a257cd3390a504ceb88b0cb4a514143541b8
-libLLVM_assert.v11.0.1+3.i686-w64-mingw32-cxx03.tar.gz/md5/309fdd429e740941e0deab08f1043c5b
-libLLVM_assert.v11.0.1+3.i686-w64-mingw32-cxx03.tar.gz/sha512/b01bcb83a6ef208692bd26899e7e75177f6bd619afeae0dc46217ece0f79bd81d2f07fedf70084478fe1f8abcc2d031a0ea06b55ac55a34f3facabb38a2a993f
-libLLVM_assert.v11.0.1+3.i686-w64-mingw32-cxx11.tar.gz/md5/a9ee212809e891f147916cb6568f8b0e
-libLLVM_assert.v11.0.1+3.i686-w64-mingw32-cxx11.tar.gz/sha512/927a010c326dac7faa4ce822b7ed01048b825a8050ae03d11963a864bf8f70b810e58985d37ec04cb69e7ca5c8a4956438f8c5f50dc8af90dc00bb10c0b6dd28
-libLLVM_assert.v11.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/md5/6a2f2cd6cad70e27029f4a7c78196589
-libLLVM_assert.v11.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/77bee805988a0df69cda34c5196f29234d5e4c7cf55da2154fd5a4f431123cfa3ef870a7ee10dd773eeb446647f500edca844cc89d1fc39d04caea6be58ce6a1
-libLLVM_assert.v11.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/md5/f780073977fddf46200103b476ba4176
-libLLVM_assert.v11.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/40107288f36fe50e8f60fcd7b18a9058f36a6794cf8e8f06328437fb77f0890e1b890aba7f931329d98219faaf922dc14474f66340e6a70b8309f171e2bbc901
-libLLVM_assert.v11.0.1+3.x86_64-apple-darwin.tar.gz/md5/db4d528b06836c46d46755fc93cceaea
-libLLVM_assert.v11.0.1+3.x86_64-apple-darwin.tar.gz/sha512/cb62a0db35f0f1cc4723c2313eed265c4e11dc6b225d5b30afd7e2aae0c79b0dc23558640a409862a31ccbe337e4c6e4c1a35f3c93251a1c714dedf5f4637716
-libLLVM_assert.v11.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/md5/b00cef3645e589cb217d8df10173124c
-libLLVM_assert.v11.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/sha512/4de8fd6f90a5f85278c1045731e71855969b615e228a6b63bf53c84b145c3d8db4a7fc236301249e6ac195ee13fe62b0c23dee1c50c968ba74a1f9857e5ae3d8
-libLLVM_assert.v11.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/md5/d326d3cd8918ef4abf4aa26a93f77310
-libLLVM_assert.v11.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/sha512/ae49a764b848d179d8799793a79ba3cf03ece82d84e4d26b1f48256ce159984b1c54439db1f7b928f0526a85fb3b11021e9620104ae0404b7b85824aeb59d42f
-libLLVM_assert.v11.0.1+3.x86_64-linux-musl-cxx03.tar.gz/md5/10f34bf03747a92e6cdb6ed206b21724
-libLLVM_assert.v11.0.1+3.x86_64-linux-musl-cxx03.tar.gz/sha512/ed06c31ae0024e60f7ac3a77dcde37e8aa545b280e0aef669edd6cf3f060b6acb90072c488f91a44bab6e573a3e50576a4b0a11f12596a63729a46547a7276af
-libLLVM_assert.v11.0.1+3.x86_64-linux-musl-cxx11.tar.gz/md5/55ad55e3e093d1a3c32775036c68f629
-libLLVM_assert.v11.0.1+3.x86_64-linux-musl-cxx11.tar.gz/sha512/d1a30dd14b64c49397c45b2e53aa6099585ef22014fd9091dda39a7ccf03c78b85b018f3844c90c56aff47c6725bc5dfd73b5e6402ea13f73109aea6debaa276
-libLLVM_assert.v11.0.1+3.x86_64-unknown-freebsd.tar.gz/md5/7d88bc9491b83793e39f494f45b7e636
-libLLVM_assert.v11.0.1+3.x86_64-unknown-freebsd.tar.gz/sha512/c6065593e89f861661ec9c1fb35e416496e79e9e0f56e5ed4b7ba9fbf8b236716ff7733bb60b5de8942b6f9aca5705e0236e7fcbedb24109767fc12c29542d01
-libLLVM_assert.v11.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/md5/a012e6c9829ec51f260bbdedb20a2863
-libLLVM_assert.v11.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/sha512/24dc81297cab5d7eafa47de5d0ab5ca0b3da3ffd032de94488c81556d5ebaec850884dd561e14de98fa7dc0104d45acdd0298e0b0339dbb7351a9dbaa32320f5
-libLLVM_assert.v11.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/md5/bc0f8437c9f2aba1a7e696cafd3b4618
-libLLVM_assert.v11.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/sha512/ac640918e72b169fffd4a43f1453965ea0ba734c582c3f6b04cd6f020b032b4745ed79208a43ba1c1f700f0122596466da4ed881a89a67f6df3a62141dab9f04
-libLLVM.v11.0.1+3.aarch64-apple-darwin.tar.gz/md5/7b2400d9ae3ed7a9091011b7951c8ce7
-libLLVM.v11.0.1+3.aarch64-apple-darwin.tar.gz/sha512/9078fc8c24d1749b303f5c7843bbb6b5322080e4adfca0b96d7757454783617253dfcee07f24d1adaf3768563d298029a69bb75d39dc0701d024c27c66e50fd3
-libLLVM.v11.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/md5/712ea06bba40025d5ad86229585e1eb9
-libLLVM.v11.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/sha512/6260d68dd30f8950370db956dc85ad9ba1e474e9c27781ca2640d8d669d3fe34fdf2e020922e4f11d88c4394bb1e72f81f211e656684682207418b954f78102b
-libLLVM.v11.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/md5/941dc9c8a4d11cd7cc0bf5edb47ae822
-libLLVM.v11.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/sha512/0ae65d99a49713aaace58e31f539c07727c34f30e089134c64b96b35dd061f44fd9889372364b1c9a36fe295a005160210cdfdf937c23e834ffa1b4260c6705b
-libLLVM.v11.0.1+3.aarch64-linux-musl-cxx03.tar.gz/md5/1bf46309788bf2827e337376a7a7355a
-libLLVM.v11.0.1+3.aarch64-linux-musl-cxx03.tar.gz/sha512/86dcde04d89730219b22dd67ee113942ef689125cfa6d3c5294387561b7771caf948e889a0cc93128159427704500c793b62e6a6cd39a372ff6be735fbb0fdf6
-libLLVM.v11.0.1+3.aarch64-linux-musl-cxx11.tar.gz/md5/09e727bddee3132c944ed1404c421c2c
-libLLVM.v11.0.1+3.aarch64-linux-musl-cxx11.tar.gz/sha512/00a5f853557ca1f65fc807d3109357b211ba1935de66e2d3b59c8e00b4837c04b2ea72d6c6417a8fd788835938abaa8a4fed9e36d3f9cc009260150417465d72
-libLLVM.v11.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/46e656d8225b534bfcc01f7ceb8592cd
-libLLVM.v11.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/325f4d521fed87fa75b296dfc8ea83e4fe5b04e8902f66d15d2f020b5b32236184a7b31a92eb5e0acc9597b1a21f94e6b4101f9844615767b03e6c2782c08760
-libLLVM.v11.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/26c52c856334ed443dadb7874a808946
-libLLVM.v11.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/e1bc796fcaa258e8c650c83c0b533f95bfc932c389d0ea507fbfecfd1d8829e746d53377508f909971d83b4247a0b31c31d2d1106cedc84234536ac19d2dc37e
-libLLVM.v11.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/md5/c223e17dc0eb63e02625ed269b5c4064
-libLLVM.v11.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/90d29712769299ff526c7c7a4307bff6e519e1e5d991b4bf4279f5231263eae6103f0887ed2310c45052c904cb05d5b059ecc3ae0d124540fb0886f8c9cbf61c
-libLLVM.v11.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/md5/54def908d9d9d93e0c16770344761dc3
-libLLVM.v11.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/d89b9ef23fd2bc0f974e343cf4254747a4ca2918bdcd9080846a56f2580626b80c7fe326488083e57330d857227e7bdd43455fd5f6c9dc90fdd0f9eda3c751c0
-libLLVM.v11.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/c927115d9435d5712dfd6aec848404b6
-libLLVM.v11.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/53050300ce37e79900677b3a447570b19518bc3ab0d13958274544686bba378742d7c6ad87b850a63f825a5fbeca0287f8b67332adc81962911e43cf952c9806
-libLLVM.v11.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/3dcfb85331c0bb7184bc10403a22447d
-libLLVM.v11.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/9615b09038238a1cdabe1638a5550feb8498c9889cd805c5876350d7d5185a4ba467f2a97d2c4e192adfb59cfea76afb109e3c9687de183951a535b6b8271d5f
-libLLVM.v11.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/md5/8b918180fafba11ae565771ed6839d6b
-libLLVM.v11.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/29bf5de468872c0eb90603d2981e891536da58d60bfb2d0c095dbd9ad7c3390276117164077bb89f95ed3cb21a03c0e1245c4a92cd46bc9ed25e4204fdc69e8d
-libLLVM.v11.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/md5/3f7054e973baa77535629c08f7b22cab
-libLLVM.v11.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/e10819701d77d14bf28bae2fbd69ea03007ee6cce1d6c54e9dbbbcc5dca8d6c2351ee21007ba19da827c445626f5b4bb7e345d54971ba1a35e04278ed9d9b83c
-libLLVM.v11.0.1+3.i686-linux-gnu-cxx03.tar.gz/md5/16c1530e3df1d137674503bf02423ffd
-libLLVM.v11.0.1+3.i686-linux-gnu-cxx03.tar.gz/sha512/8284f5a7cf1f0fa8664031e16ebc12b7d5131de8efc7bc9e77fdf33a084294e40917f723ba9b9f56384f527ffb7bf4c7f04a2dff8953d25a2f473c90322f61d2
-libLLVM.v11.0.1+3.i686-linux-gnu-cxx11.tar.gz/md5/81670b1d368e9791705b71ae56373952
-libLLVM.v11.0.1+3.i686-linux-gnu-cxx11.tar.gz/sha512/dc0525dd275492ccd24533b89aa91a85660cb30cc3eb22f3c6fb10c854c890b5103048c802e06566aaf60d70bb00806ccd0552945573b78cc797295918968fca
-libLLVM.v11.0.1+3.i686-linux-musl-cxx03.tar.gz/md5/827cfc808c50b301c1645a21327692bf
-libLLVM.v11.0.1+3.i686-linux-musl-cxx03.tar.gz/sha512/dc504771c7cb465e40e4f6ce4b62b1f5114018209010e7bb199c2753438c1c78534d1aae39246d06fdca96f799b20c89e20a51cbf06a9ba318d0ff827d3a8519
-libLLVM.v11.0.1+3.i686-linux-musl-cxx11.tar.gz/md5/fdb9e3d33d3b0e2051548d6a55d03daf
-libLLVM.v11.0.1+3.i686-linux-musl-cxx11.tar.gz/sha512/2cdf2fd7706c4e4ad80b12f6fc77a0389d4d015e3550c346f17fa0905dea058eccdd8e5b167477ff8ffb74cd1fe673af5c7ae5a13404da0b59232cb0e66669b7
-libLLVM.v11.0.1+3.i686-w64-mingw32-cxx03.tar.gz/md5/f486dfa62a336cd7d9e1b00602fc1177
-libLLVM.v11.0.1+3.i686-w64-mingw32-cxx03.tar.gz/sha512/8264c0cbd63fc4dc17472526723ce6ce4072b276d06a95156a259f8f2b2524e1ac6c5ca768a974f8f1b65bd0674336f7922225d4be5edb90680e549774f7d05e
-libLLVM.v11.0.1+3.i686-w64-mingw32-cxx11.tar.gz/md5/fbf1949afaa7e4d14722fc21c3dc7892
-libLLVM.v11.0.1+3.i686-w64-mingw32-cxx11.tar.gz/sha512/3c897e31218f1975456d2769678f99c14b6308a464beea2fa6a4f534f355b5e9a7ef51c3f5fb0f44123ccb3c4854b906b96bb35a9ec01e1a1f39df903fadb30f
-libLLVM.v11.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/md5/2e82ea13c59bf22f085768766d6dd8a4
-libLLVM.v11.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/664323d8461e5c64e26d8fb84d3d4bc68a8bc2020b3b397545aa651bfdc0539b83ca2c9bec80314322ce821ff225ae48e96521e23b159a684e74a244bb02bb72
-libLLVM.v11.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/md5/04f3934d4421e452673e015dbbe052e7
-libLLVM.v11.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/b8a0a8b6c56dc0ad4b6e179efc8461e69e01e69de8dc02b46644f74dcb859a5d6ae293b38b2091298e5024b7bbef041cb5332b095e6a3be9e44fcb8c21b54353
-libLLVM.v11.0.1+3.x86_64-apple-darwin.tar.gz/md5/487f852d540a46d03a804318067cab8b
-libLLVM.v11.0.1+3.x86_64-apple-darwin.tar.gz/sha512/9158b5c8cfcc3dd75d835d6fae6b026bd57aaa60e8685d0777de8b31c5b644b40ac470a38632f6b0aa5eed85e1d001d3789a7f2989530f03f5e8dd95b947223e
-libLLVM.v11.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/md5/6f73bcabc8c748b997ce2d9433e7c910
-libLLVM.v11.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/sha512/3152c54501343e575dccb928e55f2624340cf53e84dd67cc606e8a6ec18bd3b8c4121089962fd99af54acaab70279172b65aa35fb7ee50b53cc83db188febc07
-libLLVM.v11.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/md5/4036faca6177ec29d38ae3d93e0a757a
-libLLVM.v11.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/sha512/0875703148d4f3056731247a6395c268e7a34edb5b25c9e1cddf507c13afd2f03d261088129fe822028a02e34842e41c48eda20b572b33d714d17b5b7c6f549b
-libLLVM.v11.0.1+3.x86_64-linux-musl-cxx03.tar.gz/md5/b1aa10a625f7e069023e10fed0ce6ff9
-libLLVM.v11.0.1+3.x86_64-linux-musl-cxx03.tar.gz/sha512/b196521d0fcb830f137d4d67526a3fb46ae0f7591f290e3e0e45c7d2d412518d6b29e005edb38fc5d56b987f5f47f7bd272addfe0cfb1a9e41752f028bd0750f
-libLLVM.v11.0.1+3.x86_64-linux-musl-cxx11.tar.gz/md5/154eb676aa0b1fefe0f4bc2418deedc7
-libLLVM.v11.0.1+3.x86_64-linux-musl-cxx11.tar.gz/sha512/91a79167fd025092a8f36cc37909a2b3f2591cb2f00809a6846e7abb8a41aa1c2f0f0c1e29d4365266c79c3cbacdce2325b58d09b1250806ea8f9e031a7b4c75
-libLLVM.v11.0.1+3.x86_64-unknown-freebsd.tar.gz/md5/1a566cbd9e79eacfc7a76ac3defc4b6e
-libLLVM.v11.0.1+3.x86_64-unknown-freebsd.tar.gz/sha512/535a4e7c665d34e59b5490066373ce21c400001ed86e38b4f5ec7de9ea758b91fc58dadadb239213c4e27185eff33a29466250d978d3dbfa08e794f50ca6c3d7
-libLLVM.v11.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/md5/97e8cfab218937f77421285f0dab30ad
-libLLVM.v11.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/sha512/cd514c19006f4f997b8530109a8b2f0099df1e0af7c0e5fe85dea65b01056cfd2b959190e17a3da8a3a223c59859aadd7486d1b6a71ab3207242e9bea891d546
-libLLVM.v11.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/md5/f4ab87e307e014acf4600dc0a1d8120f
-libLLVM.v11.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/sha512/a44840a51cad8a7d0db296a3f3613372562c3c5464bb2f1a8e0352d9e521bcaa83053a70d0e369d75718cdd142584f24bd727b6717401174a270835ab40205a1
-llvm-11.0.1.src.tar.xz/md5/6ec7ae9fd43da9b87cda15b3ab9cc7af
-llvm-11.0.1.src.tar.xz/sha512/b42c67ef88e09dd94171f85cdf49a421a15cfc82ff715c7ce6de22f98cefbe6c7cdf6bf4af7ca017d56ecf6aa3e36df3d823a78cf2dd5312de4301b54b43dbe8
-LLVM.v11.0.1+3.aarch64-apple-darwin.tar.gz/md5/181e555215a01db6017bef6b75b857f6
-LLVM.v11.0.1+3.aarch64-apple-darwin.tar.gz/sha512/7940b4494e5806319d013feb42771884c320b964fc674abfd8979169fe32bfbfa03558195e520a108fd645a4c7a06ef918d59cd411dbd086b6a46f80e5ff2c9c
-LLVM.v11.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/md5/b02bb670ea84bcca20f318ffb26a79fd
-LLVM.v11.0.1+3.aarch64-linux-gnu-cxx03.tar.gz/sha512/f05004c2fd6fbfdfe502e4f83babb7c29f9863e900e711c86023e878808eb1a31ff0285c3288a616a2dbecd7dfe3036fdf4465a64633af723f10c2fce68aa84d
-LLVM.v11.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/md5/9aae2250dca2c2e8aa305d0175596fbd
-LLVM.v11.0.1+3.aarch64-linux-gnu-cxx11.tar.gz/sha512/0ca30244061b3bd6f0b0a6bbbf61fc40b02db00b3e9bca4aa79013ac12137aa7e872ba7113c6bad3ca08225431a9d9a4d2267852606e8360b866e5752f027965
-LLVM.v11.0.1+3.aarch64-linux-musl-cxx03.tar.gz/md5/f3c3b9667ec2ab3dd9f3663859a0a736
-LLVM.v11.0.1+3.aarch64-linux-musl-cxx03.tar.gz/sha512/5b1abc1967009154a71fc43813a80d37e1c8531575938b2682212478b51591def16a7141292844568ce65f76bc686a7e9f7a4d77b9311a57226ccd43d8494d48
-LLVM.v11.0.1+3.aarch64-linux-musl-cxx11.tar.gz/md5/98505793b877160dfc7466284fea9e71
-LLVM.v11.0.1+3.aarch64-linux-musl-cxx11.tar.gz/sha512/90a0b9d8da40eaf29d9d7448a3611ee264eb90cd947fa68a6e5e2757d580e82bfbe8ffe3d31c2c2f1eade66d2d4b69a7dd79830b813b62ac7bba289281325559
-LLVM.v11.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/fdc1abc12cf08fae3d4e110976e5967e
-LLVM.v11.0.1+3.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/6157dec83258fff29d54fe2f3783a7975a4487a5bc1b98f8355054970ad958eb26ee11f7a2f60c5ab8ae5c8e799275fc712051b00039a14febd0db66520d29b8
-LLVM.v11.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/2c492cfd7cac00863fe373f8d91ce84c
-LLVM.v11.0.1+3.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/654e9842373cb52114407340f8cf4c5954966ba677cae9aa085e22a3ed167bb1670286f3f3dcb6798913fee45937da0327817a97f48db021d997c22862b039ef
-LLVM.v11.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/md5/81b6c3bcb6b5e595f5fc12aee8596bfb
-LLVM.v11.0.1+3.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/8360352c772c69f9cd9469b41ee7494534248ae0480f7661f9303747cb3a87e26d5659fbf32d7dd11ba70b4f24812cf94dacd7565b92b42c9d63da388b8e93e0
-LLVM.v11.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/md5/c9559f735a8419e77a874dd75451d726
-LLVM.v11.0.1+3.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/64cb59c966a1f42d1f31e32cf584602cd1a6a3bb5c3d805197c8e3cd96ebfc1ad3959bf52be2767d88110fe0025307f1e1982e9029630dd5438e94886da21366
-LLVM.v11.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/bacdfbffc987ab1e4e0a023c4f1cb636
-LLVM.v11.0.1+3.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/d9f772a958ff3621f10bc562833a7121a4b3c3aec8d8d03694554714dd54537aa89af0e080f16ffdcf965ae33bd1bb05d9659beeaa86ffb7e553a4202b924a0f
-LLVM.v11.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/018802fd42e68b24459c4040df0019cc
-LLVM.v11.0.1+3.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/ddcf9f104f47485cea67d0e5862393c2161e6aaf2e1cced1b0600b5b2dfb3c35895d00e47349de298cc9164665cc8cc8af3249997b716872fe23bb58bb1ea370
-LLVM.v11.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/md5/dc1ccc83441df682b09624bf7047aaf9
-LLVM.v11.0.1+3.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/b717137c831f698cd9b3339e5beb0a2e9d6fd8c59961f91d70b8e76d8b1ef7bd9ef610a49d02d45bd6e7130262f37601a780e33509b78e2b1caafd40eeead7d1
-LLVM.v11.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/md5/74749fde3378518041fcae2e447370ea
-LLVM.v11.0.1+3.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/b60715831b4583679455ceffe82acc19ef6c7fe3798b02f3df8a08a129a4f80fbfd7ab59c663db4f4418285dc3551e536c3edda2e24310d10cecdd89596588fd
-LLVM.v11.0.1+3.i686-linux-gnu-cxx03.tar.gz/md5/0d403181fae7d966c536ee0cd6e39c12
-LLVM.v11.0.1+3.i686-linux-gnu-cxx03.tar.gz/sha512/71728fdc4b559df058d9ddc6dcd1dfae4d2dc854e2038ba14baef62e1cba7f3e2c7565fd2942b069690497989e846d0b5a572e0bbea8de7873752e43bc6827c5
-LLVM.v11.0.1+3.i686-linux-gnu-cxx11.tar.gz/md5/791e288d0acc976ddf18776540b21ca2
-LLVM.v11.0.1+3.i686-linux-gnu-cxx11.tar.gz/sha512/49dc3c9a2952f928d51c0af64c9f87b441c05a3f7c497be7564052936bb44c4e5dbc05dca3b69d9123d784647f6ca1c528ece5bc11ad39059fb8d465582ff394
-LLVM.v11.0.1+3.i686-linux-musl-cxx03.tar.gz/md5/da303f41ec8ad7329e826a5e94af8889
-LLVM.v11.0.1+3.i686-linux-musl-cxx03.tar.gz/sha512/39deaf22f9e5c2422eefaea6d64c8d78d3235385edb43ce050f8f34c5c9d128c7b155adb2af5c1f0653f92932324f9a8a564df6dc97009402842e98b4b9a17de
-LLVM.v11.0.1+3.i686-linux-musl-cxx11.tar.gz/md5/3c5c77b8534eb0756c112193e4028838
-LLVM.v11.0.1+3.i686-linux-musl-cxx11.tar.gz/sha512/9b00f22b169f17f1766e40da5a7aee602cf0b1f34ae97cdd02f59f3f7613f192dfa6a0235764a175efe4cf1ddcc1850d37052ea45e16947f0d1ad79deb6812d9
-LLVM.v11.0.1+3.i686-w64-mingw32-cxx03.tar.gz/md5/757aab5c47122ed033f987847a95ec0c
-LLVM.v11.0.1+3.i686-w64-mingw32-cxx03.tar.gz/sha512/de54c3f21959ebcb081601deb01bbbafe415b05cc7240fce230483b2901d77f89575a74384fb55c919bfe25a8ca8cde2f2e0c6f4f6a809fc73499f4752f2043a
-LLVM.v11.0.1+3.i686-w64-mingw32-cxx11.tar.gz/md5/c7f3d289428d68802eaea4cae65e043c
-LLVM.v11.0.1+3.i686-w64-mingw32-cxx11.tar.gz/sha512/54fcb15fd2019dc0f5c3db10c24c9d21d8159e923b215dd000d6107a136b69ff240bc447b2a280a023f25e98880a86ea89cd42e1e3caacabeb59a4ba18b96d32
-LLVM.v11.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/md5/a3032d9310fdfcea2c0e6aa2cc27fe49
-LLVM.v11.0.1+3.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/a8f8c4739b17f902f08307dc2f154ebe90a229299d476d8723cffe504cdfa91a8c36aadab4c12c1db4ea8cc056de17b66e68e1f086ad5d30d5f2edcc53da2337
-LLVM.v11.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/md5/ba581dd66006b02e1e7643b93af14499
-LLVM.v11.0.1+3.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/8037109625c8cab5e310ff95aa852bd792b35f04b6830870962add89caaf0efbe001d08d6f870044e998a7b14216700a806eb132daadc5c4accd77baeebfb15e
-LLVM.v11.0.1+3.x86_64-apple-darwin.tar.gz/md5/9d9c737b6e98a41086a1802283730757
-LLVM.v11.0.1+3.x86_64-apple-darwin.tar.gz/sha512/b24025364052233f977c17bb5f9675b4404fb4b197f35752a8a2591ac9786eb8aa5b4053ca4269e6c4de9daf401f88b0361880c3b3201be2db91626f9cad9030
-LLVM.v11.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/md5/ef4535f1aaa5867e9dc4401e894086eb
-LLVM.v11.0.1+3.x86_64-linux-gnu-cxx03.tar.gz/sha512/a1d7e4bdc72f33ec669023c89d4dcba8bd4b644e7c38e27784ab0f052e8efc08cb0cb89c38d5ee471961c8c8bfd6eaecc89898e70093a494f6cc53b9a36652eb
-LLVM.v11.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/md5/bcae5538b46ba7241b429dd568faa1b7
-LLVM.v11.0.1+3.x86_64-linux-gnu-cxx11.tar.gz/sha512/f336e5d7d007298027efe39a9b50afb32376d1361622097fac6a0530ce8a834f0cfb6d8abf05cf964bb5159b0864bedac978fb95031cb84672e0bc0b037d03aa
-LLVM.v11.0.1+3.x86_64-linux-musl-cxx03.tar.gz/md5/353ba4a877af02d2a986cb4917fc6cbc
-LLVM.v11.0.1+3.x86_64-linux-musl-cxx03.tar.gz/sha512/ea038f9a12a84df32e62d877320eb041d2363b1714ded05508b1fdfb2a874a6964114833a7435feaad1040fd31a533acd29e48731f49e8df3b60fcea04f565fc
-LLVM.v11.0.1+3.x86_64-linux-musl-cxx11.tar.gz/md5/81d43dea36a9e2ba628263b1af36461b
-LLVM.v11.0.1+3.x86_64-linux-musl-cxx11.tar.gz/sha512/a99b2dbdaf55cd36e8ead346fc7fae4ecd430cb98895343d6a409d0ea95406836048638dd5ab73129a2b2a637abd1e997515d8de1e9e5e8449e0f9d747773d4c
-LLVM.v11.0.1+3.x86_64-unknown-freebsd.tar.gz/md5/0f83ccf57dcb7814976a57c42c91c454
-LLVM.v11.0.1+3.x86_64-unknown-freebsd.tar.gz/sha512/06c3a97b6ec4f9095e8e978ccab0e65e03d26f99d90485c7893b6a41a53a4e5a8e8e531592e20f9d73a1100fe87f7fad793dc6cd19b09ae0bdaec5c362b7b4ac
-LLVM.v11.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/md5/8db6882c343f0ff1ed3137c1a17a1676
-LLVM.v11.0.1+3.x86_64-w64-mingw32-cxx03.tar.gz/sha512/a3fbc239389eaae152f4a21e5b50b6afd09adeb75d8e32754dfba5da3815991dd05ef5c403ef9539bdd47cad3f9b2c7066a61e3f40243f7bd237a7480172be86
-LLVM.v11.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/md5/bee41a179343b54359aba1b159110a39
-LLVM.v11.0.1+3.x86_64-w64-mingw32-cxx11.tar.gz/sha512/3db19d091bd52cfc2018a25f28bf7714f6337bd93b24349ad90d2954f60317189e5e65241ff114c9f7cd15933b685f0a4a193d45daf21af593fe2890ab98843e
+libLLVM_assert.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/c747bfdb199d77ba8c9a6984e114b007
+libLLVM_assert.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/6ec360ce5d635e2e5841cdafb8290734d9ae28ff61ebc69e1bb621231e69c431ae93fd6798e70f02cc3476ba1a53a5317f75c57499b0e1e0feb4277a0e369a2f
+libLLVM_assert.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/8b8345f6f0139dd07751cccdf636f1d6
+libLLVM_assert.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/b34ed1f91df7593b456e0e3fc2fa059c8d76f8e0bcff253d2675c02424be34905e44d7b824ae0d2c032510a9205db719c0c839dec5f121193376b251f338f505
+libLLVM_assert.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/f077d9bdd7c0a66053a3949141469007
+libLLVM_assert.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/91a68b1cc99339c438f73a1fdd85d3fc23b2ba5ceeeb3c5e6876ede22f53091c7f8da308a1cdc6d2a9d832ea2f4c2ece7536ce293570a1f639c043031926dc94
+libLLVM_assert.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/9b9b3101d9213a5a74e3a2704257961b
+libLLVM_assert.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/36d78c7881e55113b7717b120a9a691b3fdcf41e7d69ec3c8a04995268f13bbf2c6cc4faef34e7e38535eb75bd5d12f004e85f38fab13d7deb08171a54104fad
+libLLVM_assert.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/9a2084773d4bc35d914aeb388dcedcf3
+libLLVM_assert.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/08db23f8bc571d87e99fa7373ea582ca85827ac2b89caeb53cb91f6917d6c32b490e1c7604d8e31e9dd0448aa488b654e7e101cc5550632d762ce910616b1dd3
+libLLVM_assert.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/82c27f978b5f600df05eb43e31b17e94
+libLLVM_assert.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/689b0d3b5697268ed17b5824af67d9a55889e9ec578fce989275a6dc808cc806ab8e4a71bd4f2bd4b6bd5bf65973929ff14d8b1f7830c31052b59c80c72cd77c
+libLLVM_assert.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/af0939467863fae96656e27968a83a4b
+libLLVM_assert.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/4dce41fb4f24d2af99b6cfa30b40cfc00d6f547089551de6af7a44878559e6b412060a93358ce1ef41a1d3da7b522d0aa94f0c9f24c78e9a68c6e8e27ca7ef16
+libLLVM_assert.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/684c09fb69a415c292b4965669a21542
+libLLVM_assert.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/3a275e3a112e120c8e10b1563a4874664db95ca2d555229f48b6c31a7eb9407e74f890c3c37ba8f58ed37b38a91e3a775abee4a29508ef1531ba31a902f18458
+libLLVM_assert.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/e62d3df087506b8c3457fa77fbd018da
+libLLVM_assert.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/67bdfaabc3107ff6b8a4813caa8dec38e94f4e6b297c353fea219e3695c20399466795fe5efc8bd46636ad68f1f25ac967135eb49b8ec4766720eeb282a574ab
+libLLVM_assert.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/4f4010c5d82ca20e7064937a449e617a
+libLLVM_assert.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/31cd440634cb00493be58a4eb711f9e89a362b870d5167d8baa6273d700a0001e414cf3427285cd6d8d31863c6a136f63efa27b1166d7b382fb7a33b2cc4f84e
+libLLVM_assert.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/9d7f3017c1e21220c92c46449d41ba3c
+libLLVM_assert.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/9e1b9b80bfc313d08cae8d62ebb055fa752f5fcb9ba42606e0e53028f6d5df372d535cf70da2c0d1cc86f6ce15981b1b5f9f721c24197232609638dcfbc73d48
+libLLVM_assert.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/9f74405d26debd6ed6743f6f8ca8bd5e
+libLLVM_assert.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/12c2c415bdf7669f10f60f3ab61a506b864cc440886d55d4bfc6d588f0187e0a04de09f3da2b3fd00652ab3688a74771c06603e895c9f7de60bdf317e43e8837
+libLLVM_assert.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/0543b752e04125e97a087748785b9987
+libLLVM_assert.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/11e76a67c1f6deea206c644463a3a4660ccea97b79300e3430f10f86a7115f933bed78590033358c8a2ad04375ab7546b42a8730a9bb40e0a9b4352ffedbc481
+libLLVM_assert.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/e43db260c7b4f918da8b8e234437407a
+libLLVM_assert.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/367b42e601d7684805ff627b32329aed091d02fd791ebd5d661a81baac79326f4c9b77de410f7bbf1ea2a7904e6f090efe81a145ca137dd4b01dfe2d5cd7ecf5
+libLLVM_assert.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/8dec2cb7eadd13aede78f9d5ce60296c
+libLLVM_assert.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/0d36b28f5bfd35c2e0560da334430272ca0c70695f2d70f7e93edaa9308fe69f9a895615158805ab305603795deab0bb30a17f3f1690340cac064f35d2b1e6ab
+libLLVM_assert.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/27c4856ac7778ed1c45c34695003a0ea
+libLLVM_assert.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/1507d6f57c794c48678e8826b885917046db12457b7b9b188b334fe64303199f4170e7c536165c5660911e2ee41f8251cacf0a8ce0bc44fa74bd9ddd0b0f1265
+libLLVM_assert.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/ec26cc534387af2bda1d26988c805b93
+libLLVM_assert.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/b3b546fc5b3548d1513aca5dc442a3bd0c2a7d7b18507ce614c70de759f986a6ff999ad9dd8701752d8c9671d2d0343ee37c2de4de6b69672983827dc9d0c46c
+libLLVM_assert.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/fb41f85924397c3511cf32375ad2ea6c
+libLLVM_assert.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/b97e2e98e9e39cfc90a4b6a4e6c82027034f7fa24dc286e4df5add37308b64156c5a631543b9c674973d3205bc4ec3b9813ab66982cf3328195db19472f9f071
+libLLVM_assert.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/57b18de8e7d8dac8fdfb1bb65956eac6
+libLLVM_assert.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/21cb62852389bb9ab4bff38b1c073c6f46ae324757dea8f267a3826fc4db3918873d70a236431a8bd921db3f8d327d6f08ebfcd69a70123f761ab5af15ffb8c2
+libLLVM_assert.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/3ef3f53a78f5927dbd73331f43575129
+libLLVM_assert.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/92b9843e537ac5c15ef18c6e1e3b7b1e95f7638ad14ab9ecb20aacef2cb1062d87ac8de993abf404bcf06b5523e9f7060f6d419df282ff4e1833517832d93c33
+libLLVM_assert.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/e065f6bfa090d9d66b23ffd6f8e5285e
+libLLVM_assert.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/778a990a0c4193d04077fda3e3bbf1420d96e689882a8bcfd24202c8f656eeec814f68cbe5de6fad4259ef0837ad773156885a5606d107b3371be278f4dd716a
+libLLVM_assert.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/128dd6801aff034cbdf90c3a9d446954
+libLLVM_assert.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/d8d4fbba77498abd154647edf5eea6d6ceecc6a836b1ce6b75e6d42949255e1e785ea9fea590e4f65df522aeb896843a52d39ad6f5f6cd80ac58f27bc9048786
+libLLVM_assert.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/f6cc8374c37872d6846c54b7bde44592
+libLLVM_assert.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/31e28ffa3d9a8c8a86d08b9a3e50d9e021c010fa9ec0222c284fc7af86aa6a71ee252d109cae8e02dbcbad09126e01ab2311d55e9dee9939a162a472f3f5e517
+libLLVM_assert.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/91df95adb9d0df769c7c530e7b558171
+libLLVM_assert.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/89fc4aad84ba13e4024e331f9da29a28b1651a69c4ef47c52fb2a20c6c0cbbb319e9ee82b9604d5a934147adb7b29a07d360bdb6f69a8e95255303ce0c7cc2e4
+libLLVM_assert.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/d7933d46280095ed61c0346a14c19f95
+libLLVM_assert.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/fb7fd1a0d254de08b7045421fa1aaf0f0594ccf50fecffe5c1c7c162553712cf401e66f76f34286669bbac034a6411bea9e9abb99b1a6f711c4d4f129c937ce9
+libLLVM_assert.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/298e051ca068f5b61a1851274451bc19
+libLLVM_assert.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/76517d5456d9cd5fcb75528fc2968f8a317e8660b73c9e5665c22dbf84efda107b16942c809167e0d7eb8457b21f3c20ce9de62f5d0e433ea7d21b8d3b54a811
+libLLVM_assert.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/cadc91e0a65bf719abfbae53ea8e0158
+libLLVM_assert.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/2097cfcf21ce2f37cb079934a9e7c8ef30739d20e1b8c2068e59b7e2d4254fa9cd051492b23e426c1f0c2831ad60612fa13f64fd30468826c9ffca25fcb5db82
+libLLVM_assert.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/fa0ce67539470c5a81912efc743f66c5
+libLLVM_assert.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/ef89f6230942c90d59e6169e538668cb39c22c64ca0c880224ac0bd856ffb9c0d8b573797351ccb28eee240371fc03617b192116a92926def586f4deae54fb6e
+libLLVM_assert.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/a54594fb8bd760fa03d58f52532cf3cd
+libLLVM_assert.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/4c750c00398ec35052b54610d0fa5459b28023db15c61149bd897bfc1af1330769c3845dcb54601bc5ba49644b8a9196893dc6b78b10aea9d36f2a3c4c759e9b
+libLLVM.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b2936e7dec075bb0074f5f315bfe86e2
+libLLVM.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/c5c93d3e2cdcf28babbcbac2c4bbc681c03036f43b05e5699e6251e2d3a036543557e3828be40bbbd01f76d361e884b656099099712f7af742aa822e923d4cdb
+libLLVM.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/b0f763bf80430e5064ce1a4910b52c95
+libLLVM.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/e024bce71447ac86a752d1e041ce86d68a25a131fd85fee607ce9e91eb2b185c2db6982bd89f04aa7ec4f238e49586f8413fbe1f10cfc94bbe32f05a5900be4d
+libLLVM.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/648dfa6270a300d0d739218e76cb6e77
+libLLVM.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/50efed61e87dcacbb6b75b3a6413e522d5677c53f58def59fcc54b5d02d5e319b9bcb5d7e316611564f4845872902d870c4143b9f23e305629ba0e3b69dc81c7
+libLLVM.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/688b38b56b4d852735147b2a7dd6c772
+libLLVM.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/764042743772bcb10f873a0f6478b3b8cde5f150d64e72db9da27623359ee94876ac68fca15f64bdf79378a8c88b7c590d2548ebd55723e2517128a679a1b046
+libLLVM.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/ec00618e64ad972293c4d71978a92cf7
+libLLVM.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/670d2304f3db72d8b27c3acc682f3ff9e8f12f3797f80dead4cf6fad94a40f3307c2e534b1caf8392728b4768e3b216f322325eff86141502fa3359828c2b891
+libLLVM.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/13abd32c8f473d13b8f90512b9d969ff
+libLLVM.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/28d3c5d161a63c07e069becf43c303f38794f54db8ff3723f0c30c4d240f3db8d6d08e013d64bc38f8deb8353b87e32e646e9ceed74a42ce066e8df7c9a4c328
+libLLVM.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/ce3325b92fa32363bca54159693993d2
+libLLVM.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/cc75076f095dfb59a51a3abd0ec5475b8b8495695c005a93c9fe721f5571b8d1801c0807691efad97c80e074f20c365e14ade7221cadae8de8779a1313d8aa49
+libLLVM.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/7aaa8b88d7c2e0571731c5ab1f342a8b
+libLLVM.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/56c8c300d3e6407d638b3c77871c763133e5081cf78b9803269780290a34784bd9ca7b9ab4219e4b15d90f768d15dd3bbfb6a7fc4747dcb04a3a239f8874000a
+libLLVM.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/c345cbb3e3cca42fc5363f6fa90a3ac5
+libLLVM.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/af037ac16498e5b09a21a31881c2f20792e0c191a1af852fa1159843484ee829eb7bdac25c27226cc726622504febe1ee06cf0c70a99ba7542d78b1f83a1415a
+libLLVM.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/03fddb9a5f0e4e6e9ad146918b5f9594
+libLLVM.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/2698dde5f63a0ca2408430d23084c1fdba2b9cf86e106dba3b9711672e237e49f294b5ce4e4459ebdd7da0e748b067ade6cdeb7d004ff4c121c5d71e50af9abb
+libLLVM.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/d679753db557bddca088ca14537f4fa8
+libLLVM.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/468943d42df129b61c758c5567d0838580611b06906fd36ed652cf0d7d58b6e23907a0591eb1e02425a54a55897e3845952b7d87c4dafd405b673c70a6240635
+libLLVM.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/390e47b0ec2a37c3ec9a10f03fb0f369
+libLLVM.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/7f2fcaa1afd00f52d20128ba55d1f919a8c8284b7f3de33c56d6c772a9cbbbeea8c433bbecedc20ce4ddf5631423fe44ad5f12c616df46f689519377416a799b
+libLLVM.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/161d18aab34df5b02fce7ed5eeedfce2
+libLLVM.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/f403c9df31233b39d80c415cd69cdbbd0b38d2412d090edc41ef197de3c2c07bec1b19f7c1118c8c20fcdf372fe6cf26cca6065472783b534669d1ea8e3ac5bb
+libLLVM.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/b6763e4a6ba177701dd84e019e0cc4c3
+libLLVM.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/15df17ab921aed3a84eb8fb5121df6fd6b091548eac978cd7101f8f0e342da6ef3ffad47d1cdca3a264e63bc19fc879a220ac23b8cef8ce666be66f2c78d69f7
+libLLVM.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/0c074540ba38c2f86c5bb5365455a8e0
+libLLVM.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/1fbb275226e859617deb45f239f5307d0ab1aae1fd69575793b900832ce5d15413e6e6d8ce659ec2376a71a3fd295c308c5e6d8f626f30a7d0642251fd78f8b3
+libLLVM.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/585ffb5cc1d49bccd3141092620d9903
+libLLVM.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/ced158b091a88bcedd23df1a9e08dde44ae1bac83d3416319311279fe25fa13ce92fb408ea7a2451f10d2a0fbbc2f4dad7afbc7cf790b2b78c1fba9bec53c46f
+libLLVM.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/52783b24f1bca4cfc8446c1b6cf9390b
+libLLVM.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/298ef4104e7958491687b6085b38ea67fa6ce319b35afa24c45a18f46709f96e9edcf5e897d6fe0d1b3bf8c64dd5cd92f49973378e151db263edaaca8fe9caf6
+libLLVM.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/02a50422d2b3fdaad78a6bcb809c792c
+libLLVM.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/06d6ec3ecf8560210a0fd59e7d4f63e778de8c810d24b675c2655472f4272b9e85de18d848d79f31122bd1b62cd4a00666d62d3d012cf789804f29f90c555aa3
+libLLVM.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/1b678f74c6336a2870529bf6ec8ddd17
+libLLVM.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/86d6a0563497ea9121fe5bc92db738a03ca97c150f8343bdeba10e887f9c015b2b725fb9626c78babff9c633ae393b8323ff098b32d7225db67fb9053e6abbe7
+libLLVM.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/72cbb46c59ce588587caf76e894a4f5a
+libLLVM.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/d6648d53c0dfd705d7ad46ae716e1db4a1d767317b79fe07b89a1a9d83e1d3e3f4416417f5538e8b86fbcec1d00002e4b069b0d16f6145ae5426ecb5a418a40e
+libLLVM.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/336c8d1eea9f7e9a3c8d82db6a4ee0bc
+libLLVM.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/fc2f286a5bd72b70c4416c3bbe0998be9a22b9ff6303a4dfabeda9e4acb393ae18c387d6a4fe05d41e352954f9dd5ef712614fdc51057e3ec37bf1a35b46e8e9
+libLLVM.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/7168391b6c79ab1ed805357dee26cc8f
+libLLVM.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/6fbe058c5e18d52b1ecabdf1bb89d616b1e5fce030eb076f35fd759c484dce6ce89c6b450cf82698e31e672826c55f9018a36ae5abdc306f7535ceca2ad8af2c
+libLLVM.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/3c494d8f385a75960e18145cc64461e0
+libLLVM.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/1149364d65bb209e9d867fa65036e67a0300e70fe7fb2a351ad6f956c1f60bf3ef6437c96fd63985fd45b0e5412cefce3c94279b92f64826ddeabc8ef43bd071
+libLLVM.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/9797b1942c5d4fc33f207123856c8aa0
+libLLVM.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/080945439141816585a2bb35d9222bd940a6213e377a57636d165aef0029645397655beca2555831d53e7f933b7dbff35ef607b6410318880ff23e0f5b8f3db0
+libLLVM.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/14b68e9ef3aa2b407f74546cc4973aa8
+libLLVM.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/6e73da793d4d5162747b3bf04f3c74977e685c4b6b316f8b9e1de83c42ebd55a8dfc14e028f550b22231945eef8ba846dff9a7418b37b916ddb4b9a865dde778
+libLLVM.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/231fc8bf6f2b869a98a6be1c3c15cf84
+libLLVM.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/acc5125a0720372b32f2f38e2b6f1bc1fdfce4ecd557a475ebb783e01bbc1f5efeeeb08a4953be66973e947fc6f203295755e4eb41c2f968ec71bf1bdecbd0ed
+libLLVM.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/936c10d179134961da65e94759f9eced
+libLLVM.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/68d449bc9fdd1554835d6237db6643893c00176b4dcf570dbd31ec5662d6fcc916752a33e1713f20ead8499911153d25306cc9d1f0b3cee6113ec9b971c7e780
+libLLVM.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/4e6ce12e5f8850833f80fadc7d154928
+libLLVM.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/4801ffdff0d807dc7df2d076793ff4c3751c73eff5d208476861f1e790afb39b31ba4e1eeb15da07528aaa7c0e77ecfc545c989c5d3cca7edeb31bb6b6ce561c
+libLLVM.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/414a7becaa5623dc0d7b4b4c15f848ad
+libLLVM.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/8868f3890140cbdbfb2c11d34ada28c2ac09a49a45b306e4d99e868e1f20b6e91b3a39c4a743ace4bf32fbc87abd6226e9242d3192aabdf1e7f04a62f57dc4e2
+llvm-12.0.1.src.tar.xz/md5/72a257604efa1d32ef85a37cd9c66873
+llvm-12.0.1.src.tar.xz/sha512/ff674afb4c8eea699a4756f1bb463f15098a7fa354c733de83c024f8f0cf238cd5f19ae3ec446831c7109235e293e2bf31d8562567ede163c8ec53af7306ba0f
+LLVM_assert.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/e36756cb0c80b0e113d445afb9b6a074
+LLVM_assert.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/ba3daac19b9ad0b0bc70c0dba8093ecd97e667440a414409e0414960ab5870927a65755d116ebf106e529c396cef3cc074882b86debd978467ec533deceedba7
+LLVM_assert.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/828f9b84f074321b991708afb7320c09
+LLVM_assert.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/34efda2d8e07b2eecc19dc445a8ecdfdef2d3a17dfcb8ec839c76d1e4cde5b74240cd1d5d2518e312ace497672569b87d6beb08834861ea0aa1d8d3f48b990be
+LLVM_assert.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/ba42d8486199e8b6789805891035ce9d
+LLVM_assert.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/0e1e1e1a1e4ce7568b22beccbb693001e6c32bc82e45c862287b77f4756d0c97a8e74d6e77fa768dc2a4c4971e2ec31bfe08feeb0aff67f095bb3f93af2b3dde
+LLVM_assert.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/95f562abd7fc60627d69ca9c870cd076
+LLVM_assert.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/58556d400148c2e28caadfc74722ff7dab35d1244c700fdd7da8daba9217eef78fe7cbddd94585c799942460e9fef731207ad3fb89b933125006fa58c66fb43a
+LLVM_assert.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/d7c76a40a2f25357bac82d27e220f00e
+LLVM_assert.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/4e6408703ef2e6c36c267d316efbdb1a514dbffe08ee69f598801bf2d002fa6d34a7efb4ec8f091bc9f0db5d47c443aee95b1e65138f3f5011c7c7a62ecae05e
+LLVM_assert.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/2ed619623c717da7f6182c1a126d5e65
+LLVM_assert.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/2045fe4551457e350e16f0af6efafd8ff67e9540532535c6951627a72b21786ae69c993ff18b048a613f7f85fede279db9b8734c8e77ddf79a87d08c17f2d029
+LLVM_assert.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/cf0396c08a19433d326d4637cf0373a2
+LLVM_assert.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/c3d5a0c62dbd97084434bb549430fe7c8a27a6c49dea4e141ab84b369507019472aa7f2edafe56f85d828550c7c78398f95b82fbe6358fe96a7fc3fbbb5ed4f9
+LLVM_assert.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/198d203c3c469fe625a7878961692108
+LLVM_assert.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/b2c8da74ce54187fafb0cba187576bddb492f03aac5e310340e203204682301feb6604e9f282f22beb90e5e141d7f269b147085f4a099dcd6c1eacd89801b139
+LLVM_assert.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/17ddbda46f0e2c756fdecdbc80458662
+LLVM_assert.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/d12850485f037f080285aa78375b6c07fdbdccdb62c2abf68c6a7366a7bf29c0836776e81d3a73bdade71f78d1ed289f3c8c899052ea4e7dda4848e0ff159e0b
+LLVM_assert.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/447fdf5003ddcb12595f657249ef04d6
+LLVM_assert.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/3dff913e74bbc3882adf2bb2c1bf2d7fd936304cd5f81ee9ab8c80544bd851185ef45160a04e7aca2fc4a9b6656f5616b3d301b533d7a3b40132346fd2c71c27
+LLVM_assert.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/a07d85ad41d9acbd1820c3e22cc94a2c
+LLVM_assert.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/f2652066abe9f21f2c9d2e1d21d72a9f6dd576f9da67a5b675278ea3a129ebea2f2964b1badccc6992bbabdb4ee25bcebacfa885fb3485f8596ead48f610e230
+LLVM_assert.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/b793f725bf7c1e1e9f534aada1e97b64
+LLVM_assert.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/d801cbad6c58227196372360f8920dbd25e553947b21f30e8e28ba28f39dffb546f492f4802eea1a8d45b08fb6f52cd49552f91668e967424c442e1b6461808e
+LLVM_assert.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/cbf17dec00764c1e7d28b636b50f5b6c
+LLVM_assert.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/fe70916536e9587fc3162e356a1a6d567bef10627b4659d529cfecc89ee23d77c7a5fcd78aaa966de475ee77882b58c8d8654c5a0ebf7aa2ac88da9587716ed5
+LLVM_assert.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/da27739a3fbe68c77ee71eb853560b68
+LLVM_assert.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/f5e73904693a63b1647fee476fc4b7445ccb57b053a035489adc3a679b32e34edada461bf00611b6a11f1f31e73785de7f6e62d759fdb2e4bdf4b8ee0b2be792
+LLVM_assert.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/061cff6ba872358714a921219d49da3e
+LLVM_assert.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/2abbfa2949a7700b6d80cca0ced9499638fdf1d531c722727bd07462e81f6ae3b77273c399e1ec439665e09a34fdbe657847f215a3cff629bea784a3190d11f6
+LLVM_assert.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/4f20e6d98ff6c345401b1e33297ea399
+LLVM_assert.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/e06a309a2204d1e446385ea0580b376545aed503b9b80d5a3f6f9aff4a857acbeac1f3364173b3a38f9f3dadf1f8ea916a0e5876ebb07f557d56f37f63e8f326
+LLVM_assert.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/a440f471f69dd9b5eec203f0ebada4f7
+LLVM_assert.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/2e74cbf119de31673c5f706b189e2095af963455488fc7bf80eb2bbe2908dc0c2fa4222e72a1360c2c94962c401b82a90562736f815b4ae77265e7bbaa7b430d
+LLVM_assert.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/ce9a8962fa72f49ddb3e308c496cc922
+LLVM_assert.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/af85ab008e537d74c6babb994e8e0bccd557ccb28ab96b02f9c3dea39d764e1f2a2b8f6db79bd20d346e1df262c22f2add337a3038d12d6ef8234a4ce8240ea3
+LLVM_assert.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/6dcb876e704dac48dab81f6d670d17a9
+LLVM_assert.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/1f0cf33673598d76303b9b88c6e14ae53d048a74df680ae79b443d59b289cb820d090be04533aef43fa501b776dd890a9da13d52dc964f3da1c0c1bf2999191e
+LLVM_assert.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/69fb775889a1c37e17f4d39c7d91f7d1
+LLVM_assert.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/cc6746a1ba64464fba170c206a652e8726c1e68c5929cb215ef84c47c79ffb4e8bb224e8521779bf547ab2d984fc4f34e0eb66bf2ec619f7899c2ec103c2bb16
+LLVM_assert.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/20f6f2bdc44846e27eb6e48949abee17
+LLVM_assert.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/27d5bfa52012881364f761b99bd2de0f74ba486301f9c753bed47ff151cd33b9484be532f65e1b038da470f7995454c12f75fb5e908f43caa8f0929f266bf14f
+LLVM_assert.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/4998b04416cecd78260eea1840b919b4
+LLVM_assert.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/ec28bd785eea3d2e118e7a58fe787cbf2a40a3e3a45d9ba3c6ef876492f76a00f686881129fdcd34a0547321277a5a75d7c787b957833eaf4a78a02c8ec4b1de
+LLVM_assert.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/3422a9ac5ca936ff35ff0a996acaa181
+LLVM_assert.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/0988ba7e639edeb83fce221bbefd44e039e9d31af9fc15f74e2b897f8e0bf9a4e6752ae3adf452025e0e2413c4f212abf7e0f76aa4633663ee6889dcea08aa86
+LLVM_assert.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/99d4b687edf92129bb0e113a3efe6f83
+LLVM_assert.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/cd464b130428e891df2f2cb9fb3995e71e6249138ccd96fd4d1932b96186d145ae555b7d5b3c214ac9974c995a4d288202c17482467b7934fc30514bae594f02
+LLVM_assert.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/344d868a03ae76a442a4e06e59584f89
+LLVM_assert.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/1f3dfa1a2999b3cb7a4c4f46a486168231a5dd56b2557f4ca3fd15d40f1b18b9cc32ff69b5ca657c4d5c1752068d351be078780fea8d4dc503a6b56e757798f0
+LLVM_assert.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/9024015d75a14278813ce9c65a237336
+LLVM_assert.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/5be1e537de5a0b24203373f21cc7d5878049322218c97baca1ea2da13dadafd03efff1fc817f8e314b032254356d5802e56d744b7f910dab75f41d4c8bd65449
+LLVM_assert.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/5b60dc9dd86fe85ee61352e91197f9bd
+LLVM_assert.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/6601bc4b76856e98615bd9a41e853f2c94d3e56e91ace9ec7b900f62ba7902b0a4276e21de01186ff2198c445269a2c7622774912a24fed5f8e30615a3c200c1
+LLVM_assert.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/6f72147189a9fb2de7bc4fcc768a137a
+LLVM_assert.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/c3c12a22c1e7dd80a15a9d8e5bf32d1352df943ddab7eaa75c85b26ba34a7f0a1df796e97bbfb1b95a134114d1ff14938fbaa6b6f8ef61188b53cd191ad206b4
+LLVM_assert.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/5c10619e2da3de264fdf27691aac5c3c
+LLVM_assert.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/4da219139e6420926aa4f2bc51171884c60edb35ea22fc5673d43576ecc383cf060f13ddc8ee872ccee628099e7c01d8b4c3ca1205eb7a9b214ce2a2ad7542f5
+LLVM.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/d2fabb7dd75ccac17875fe838703c6c0
+LLVM.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/933a281a212a72e815e94644d28e259c2529868fce5a07bb005109f8b964f4cdd2aa309c65dbf5a45786689e242064ca0bb1d3a01778f438f83e1a566003933f
+LLVM.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/acc3db12b256c4d19309bd8a043c7490
+LLVM.v12.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/5f85f0f00c1c0db1b0cd816e7e1ce78d99ea91d886c67494489626c40417c72f49a6bdee2cb52b71b369172909481e92ab7d99bedf9a65589e02ac1001695838
+LLVM.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/a0e3e199206f4ac941bf1c668123ddaa
+LLVM.v12.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/1a905dbf74f8ccce1ebf276b9a9f86478bf4e582be233bfebd6a12f78895ee014ce85b3a54fac382651abd461691643373f634efc41f923e76d02075d096c453
+LLVM.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/74d20c4eab17b1b78752ef1288fa72ee
+LLVM.v12.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/19f1f83fae86468045ce0db8e9adcd76d7613b9bcb69bf2f63933020a0508cf6114bffb105c7280881b9504413ec88bb189c244a00ee4bb00a59e781f1ab10ae
+LLVM.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/ec0d46c670124a577fec23373d1bf554
+LLVM.v12.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/6478e8060b5bd8f9dabf379a1f967f7e3c5b25382eedb45b5ba95f63b3c9c072c55bbe5ade9be55d27e5dcdd4cdda5cfbc24dff9e8a72ca0d338975c2769a0a2
+LLVM.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/eeac65dd85aa6d66ef6cad2150d0906a
+LLVM.v12.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/e66102d5103980d25781945e54cf5c50b9029b57df052a94b2c4481dcbee0c30cee0ec78f5da523385482204f9d9875575a14af21dc8dbb0cf08127f9a3225e8
+LLVM.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/6ffc46886a2e730bbc7fcfdb76b627db
+LLVM.v12.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/eca9686b54eeda7b539acb8437fb43e2676e72501bf677de103502bb41ab708f0abb759e8be11f1e845da34aa16aa3f365efbafd10c326a1f02978a954b607ae
+LLVM.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/4dc0d110eb0953835e7cb624d82e1688
+LLVM.v12.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/d49a4513f3fcd69c7b8521964d25f0747933561b41cb7c40058581d2feeb9f3ee5811784a93fab4784e594612415729835ae086c7b6a3235c0e506759e1d5b90
+LLVM.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/19c73dafac806d36cb63c225ec8a0d2b
+LLVM.v12.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/50b4c36d3ea6b6a8570ec85ca02b454b8912c025e0ff28a9232585a0bc54dc90e2ce5ff20db5b4bb01359100cb74aa65517336486fce9abe5c90d5b9440fd0bd
+LLVM.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/4ff58627dc352dc8dbcdddfe15e11dd1
+LLVM.v12.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/f5d1c2c92c5999797a0892d2ed0ff64fd2b9ccaec3f091d17b5c76896bd99faf24b2ee95605772364087e79522bfab54b4ad2d4327e4bdbd410cd6ef2f3488cb
+LLVM.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/4a9e0c6ad9bb5ddb511b246bd1794db7
+LLVM.v12.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/8ed5f76811231f5821fd192e7191187554ee770e6aa59794aef645d9a3fa3ac2b26ab1d0e849520c739171c41e7f00ce8872d868f5efad646f0feb16e2c0255f
+LLVM.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/7a03cf38fb4d11ceb2d67c282228f7d0
+LLVM.v12.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/65b84023e2dc8e4da55eca0ffbd0921c18ce3c4e9cf8b95d56442c0cc6bdec25bb1063a273aed390ddbf7839a84588df92ef7152c2bd999984e62f81b03e53b7
+LLVM.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/b8abd832fbe5b103c7fc53eb3a62d0c9
+LLVM.v12.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/cf22eadec8131ec287bc72fdfc90e65f81ddd21ccb908c7c0a8aff79b72f17d5429f6a9e0748e9581e82d82d089f433b2bcdfe96d18b3cdbec3019150f91efc9
+LLVM.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/cb62fed90d93fd1caae4bffa8568a923
+LLVM.v12.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/e8459774501520179052ce27d6e28025776e81f90e64c6835671a82882f03c5835f6c0536c3d4742b8bf3456a787378a7883faf1592889d67c6b1861accb95fc
+LLVM.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/cdd8eac17b0979f25579b5df14c22dac
+LLVM.v12.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/51a92d27b676e755cc4ef5762a68ed323b9169bcc6891f1f979dcdceafa5d5a627abfd2a048726fc05bfe8063e678645e52499409143cb91fcc1640ae1a803db
+LLVM.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/292dad78439e6adf32e4c52991e48ee4
+LLVM.v12.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/7c99a2a768c31a982d773a954109f2e1c6bd7c697e89e9b21a53fbdf91a46688a471cff2652fb7fd7809b0202526f8a94708958f1d1cbc49c000ad88c3c56a21
+LLVM.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/287b98d99f9cc1537206cef8e962babc
+LLVM.v12.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/a7185740df4ab6e8de6d77c4cb810502fec5b5f4b1f24a5dd87f717ef8d881631fb25842236fb177a8fb193d349e9f0072f94632b080147af6dd451f3064dcce
+LLVM.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/bde06b7de8bfe67bd17a3eddeceda181
+LLVM.v12.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/e08c227617f9baccaa3200d82bd98017a41fe4230d3eddce11e9224e63ed7206124931ffd2553d61d5a937213da74f605611f531dd14405de4d43873b9e32afe
+LLVM.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/261802abe708e1ba66e31e5247c39c7f
+LLVM.v12.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/779b8b78be912aed4e3ef52ffc9331ea94d223d86dbc470b4690c21869522937e9e52cbe325e7795bc92f1f5a4aaf26f5abfc2f24aa3a0314d5e19d8842d169b
+LLVM.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/b5ddd1f4eaead404012fafb404887817
+LLVM.v12.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/4b607174be6f39cecc39f71db661a207ea034192f652782c1a49a5d8efd3eec5488eb8d70554a9b2ab8a3dad74f90b243d5ff8651fd6e366064c06ded6cfb586
+LLVM.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/5623d2d0a1f3b1de282d520a4345d9ee
+LLVM.v12.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/ca7c2eda3a33682289b39dcff0d203927faa805b89ca2e452ecc98f3feac86ce0423ed8f53ca2ccde70f661e458e39023bb1bc702f1c3bfbed10e252a1914c1e
+LLVM.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/eb59f57374df1072421a64f61b53eebe
+LLVM.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/4e05f3e0f4eb4f0240c44427b6e6207304e9828bb18be0741bcb794214c64f01b7464bd40f5ace49428f805b08b1b3ffd48b0f2b463f4df1f9441da0d0aa2986
+LLVM.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/a1ea1dd2b8a37e8cf1f50ab1d5a8727d
+LLVM.v12.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/46423791f1fe0ae41868b6b2cecb4ed2cab818516e384fd74668b9ea60c0142b839ed559259e8af4aeef182d5179a001138f0f7ab8c0f2f71bfba1d087b5c8a0
+LLVM.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/0284dff55154d6d35a9c143e4b4da8ae
+LLVM.v12.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/82bfc6ce434cdf0a8ef8df9a3af97bde370e14007bf42837446ab4561ea7d9336ff77853a2a5e6d8675f74d0739bff3729f3625e8a75bbc0aa469b11df156fcf
+LLVM.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/9bbbb46e2b4cb35a038bc96e3f7c4621
+LLVM.v12.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/97a5ab75fdff62d85c5996286ffc30c757ca83d965ca7a9442b0a7be57d2daf4a0913442ed8db39b9712702ff21e4002df7df01e971c64e032767584f90febb7
+LLVM.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/3fbaf503543a175c681ff5a78e54683a
+LLVM.v12.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/1a9b2774e8265f1d90b9eac051ff9841ae912bc9bef9b8871551b3a6dba0fb76ca87433ee2f5bac99c27d642afc54248cde5b32e3d4ed35e86d5ed104fc803ce
+LLVM.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/e7661c57e4f198442b5f7142bdde26c7
+LLVM.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/41380a4603d5b78035e8a3550826a69887ab0f0336384c7c6aa9d123c5aa68fbc780ff8142403a2e5b69d3bacc2a93f439bf3217335542a14380d13812bbe1d3
+LLVM.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/89400c38523fb8d13064beac9dc7dd20
+LLVM.v12.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/6de8dfaded2b61fa3780b08efce53252b8f4521fe2913fa278e2c17e3db988996de8d45c685357ec12ba770d321d93f39ed48909b5eb72c26987a0c516857e86
+LLVM.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/ad4454540db96568b4b5f4fd6ff3d782
+LLVM.v12.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/3a22f3ccd38201eda44c8fdc37495eb257027ccc73a1f08c8b6f3b90db218a69363c08b610b2590e7bc33c1aceeb9caf62a8f424599d2a3258c70cc215893489
diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind
index c2876364d4066a..fd15b697754ded 100644
--- a/deps/checksums/llvmunwind
+++ b/deps/checksums/llvmunwind
@@ -1,2 +1,36 @@
-llvmunwind-11.0.1.tar.xz/md5/b030a6d5807d797e505e4fbd32a36c2a
-llvmunwind-11.0.1.tar.xz/sha512/9cfa1eae720a99ac10d05aa4d1e1b205da5c78841aafd6022a87d1272c821a43402309dfa42d8863bc6dea330ab2c0917d62284b572abb56641aa80e56a9be69
+llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
+llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
+LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
+LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha256/
+LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
+LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/sha512/d3b0c81498220d77e4f3cc684fb2cc0653792c381207390e695ac30bc74249f96a333a406b2cebdaca14e0b0a27b188cba6209bb5c1cbbb5c184d5626dbdc7a0
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/md5/052a35e879d52244e4b0804be875a38f
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/sha512/d1b34fb97f9928e046d3131a050454710a93d38e60287b7e3c92f179f436586d3230cf90b0ca0eb8a3f9ef89fef7b1ffd7d52871645dfa233a8b07ca87ea2ee4
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/1ad96a03a5dde506b5c05773b1849ec4
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/82306fb7b920fa7c71bd53b23d6915e7f256e8da9679cc926a53bb0d879f1f4469f43efe556ca32c9ef59e27b435572c7b39859090652635db4eeefdec0d1685
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/6a24fcd3a4dc3b1a98bb7963b1bb4930
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/9ba6b83ccec061a1e5260c807dc8afd6e18799431b25a7e65b97662cc4db02509d02ea07fe12025d80914cec7383624b1c8fc9add46511c668e184ede263ac52
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/09f1bfcf58a4124561553ab5005f9538
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/b0907cb857131183ffc338780c6c6dd1d48bf0ba61c3da1b8f20cf9a943373173b621cf9b2e8f1fbc657059a896b84aa025e6d4f0f1d1e8b623fac3e96541765
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/19158bcfae716b26f924d67c4e719342
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/a90be57990b6699cb737ba96904e94e1f082601ca9d01e670f025b5500f526980741921c9cf672accab78cb5327714ab6ecdbb875174088f0773ebb627a98819
+LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/md5/ba75556eb96b2bcdaf73ff68386d3bc3
+LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/sha512/612fb765695b7aae11ef29608eedf8b959f60c021287a67b03a2a0f57a5814001ffa9b261c9d60d5f3d0582c06c2b41f75fd3afb66a045a248bd43d29e304c97
+LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/md5/2fcbceeb1bfde29be0cbca8bb6718bfe
+LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/sha512/58f281cfc70b3f8a59cf4faa7732824637c811ddc5ea6a058f294f4c3ed4fa6c8ddab5c007567b439f2854635cf4fd146284059bfbc73e7006000ced9383f705
+LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/md5/153c028d97dceb6924414a7a9a137e1e
+LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/sha512/7ae1f197600eabde9036ae58623de34a6d25636d7861777e324eb97902f65e26c6f3775e757178f8914b0cb6c2e925413f5ffc6abc9b6138470dc9e67a17f212
+LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/c08a6cf3e1baf156eb05003ed4e9ebe9
+LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/f74e44986622329990842cb3ff549ff9254c81863d8bee468b0e58b7621067e7e7f7f18e4cbeafad6a05e0c107323de6828a78dc7afbcd7cd1892383ff417968
+LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/caf151150e56827be09acca6964d2b18
+LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/cb3e7aa71367ec4a115bccc2e8ac6bd5d9f22b3935b3889eee1fbf7303c5f553d7d3108977bc1f6c9b6917a6ed9e10bff211fd56b8169233ceae287b112894c2
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/md5/d95874cbf6f8b55bc314c3968a6a4563
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/sha512/4986a8d9cc9d8761a99a4f02d017b424484233d4cbe2d4f49ccd371591384b1b8d1c4d31cb908505b86b00f2b164568e57751dd949d91af203ee4a582971798a
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/md5/89077d871e15425b1f4c2451fb19a1b2
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/sha512/b65a218b05ade2e2d1582188897b036a4596d09cf65558f178c49c1a1a62b7d992b1d99fbe86a027dc83b614f178e6061f3dfb695b18a8e2b6bf76779b741d96
+LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8829dad5e34
+LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
+LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
+LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index 722bd059bfd85f..597c5d5aa58682 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,94 +1,94 @@
-OpenBLAS.v0.3.13+3.aarch64-apple-darwin-libgfortran5.tar.gz/md5/8604b202881c267dc73330e81722c2d4
-OpenBLAS.v0.3.13+3.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/9adc2e3234d3eeb6b0d970498b4b80427b32252fb76f9b28245c84a589cb20349dde2ff2822a26302bbb97ddcfbfa4ffbf98d6881f3c1684a6acce1fe3c3bd8f
-OpenBLAS.v0.3.13+3.aarch64-linux-gnu-libgfortran3.tar.gz/md5/33e09a2f2d4abac4b272faff4d85ff23
-OpenBLAS.v0.3.13+3.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/e3faf2f1de1244a6715c2cca032944b2ed0ff03c43bc617d5243738b3b02e4a87c93545beae7d3d5fae829389b20b303c2f3c59617031b3858438b03e19e1182
-OpenBLAS.v0.3.13+3.aarch64-linux-gnu-libgfortran4.tar.gz/md5/e3f9326cbcf0c1475c3b15498ce7dbc9
-OpenBLAS.v0.3.13+3.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/fcb338fbf314e9421c711d84078948829936c710335e210d9f5d5a9c3e281f37d50abd6912543857ab325b8d6af7cef3266ea7ef3b74eb46b63b59ccb0c34882
-OpenBLAS.v0.3.13+3.aarch64-linux-gnu-libgfortran5.tar.gz/md5/b60363440ca1f92ec312bb0f3be60f08
-OpenBLAS.v0.3.13+3.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/45be2d0259ddd67c1f7b53d60dd3af74cb51f2bd83aa308a14e03488452563d0099521101d37ba37e44e1f80981608f1cc8f05539e90b3b7bb53735098b49bee
-OpenBLAS.v0.3.13+3.aarch64-linux-musl-libgfortran3.tar.gz/md5/f3d5076cc0d15a7516dfcb94214b91e5
-OpenBLAS.v0.3.13+3.aarch64-linux-musl-libgfortran3.tar.gz/sha512/6015eb06ce1bb9b63434cd1561e9866a89b30afb104300089b0745a7beabc390415b033f317d687e10894c0b5f73794798ca50d2eeb27e10d03ce5eb4808682e
-OpenBLAS.v0.3.13+3.aarch64-linux-musl-libgfortran4.tar.gz/md5/579def3457520102a98d4114686b2a19
-OpenBLAS.v0.3.13+3.aarch64-linux-musl-libgfortran4.tar.gz/sha512/c8887e1f0331cee22c02e96fa829f82f9fa571f97fcaacee5714da113345ef779c987e4565a09ab84104db7fb08ae741cee118da7e05ee86ebe72c55d36ed245
-OpenBLAS.v0.3.13+3.aarch64-linux-musl-libgfortran5.tar.gz/md5/ba72ad025cbf670c15ce092403d6a40c
-OpenBLAS.v0.3.13+3.aarch64-linux-musl-libgfortran5.tar.gz/sha512/ba1fbb3e70d63490cc94502651babd71c88f0aa430a334308e9abe5f3fdcb41ba1652fd9cfedb88ba19da460589318bae816f50e8fad917d87f3542d9e15e71b
-OpenBLAS.v0.3.13+3.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/998bcfd194dc81449e7cfdd553128547
-OpenBLAS.v0.3.13+3.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/34e7730cf5dd58887566890a5ca7571989be29c0d824b6374ff6a06db20218945c868e8b224e9fa8dfc2388e9d3f9f1bba051202cb2ebcf5f0c37d985433d0d9
-OpenBLAS.v0.3.13+3.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/6d3548f1992a6104e2a929b726b9d4e1
-OpenBLAS.v0.3.13+3.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ae7604b788ac0a1210be4c8cfdd1428426c8d8c13839fb2a970c3f3265daa978487c2bd572d83ff926a13f0f01afe49aa8230452002242622e915990b20856f3
-OpenBLAS.v0.3.13+3.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/f9001a75f9b953bcc80be5b2a429d31b
-OpenBLAS.v0.3.13+3.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/8afec322b5922242e225273ec4787a98e1fde9f6c9ebb785fa0e1a579327d7e9a3f7095f0891f4da83e0e75b63224d812984f62930310a27b9da95bdfb883028
-OpenBLAS.v0.3.13+3.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/daa9a1fb3c4424688b46aec8fd4cee51
-OpenBLAS.v0.3.13+3.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/6fb73a80b571c2f40a39f512b0626da62a6e10b1ded8e6f816796a9121a03ce84dd1a6a852a89d7815edf867332c60d03843030405d8d5ec0bc4be5e4b5034af
-OpenBLAS.v0.3.13+3.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/a01df5e91de4c24963367ffcfc12dc49
-OpenBLAS.v0.3.13+3.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/893a5ad53ec1d5d0917b95ea424936778276db7bacfd7f807551de299ad5b886552fe681ea374043156daf763434d78129e5e8e783ef56d360651de8f610b1e5
-OpenBLAS.v0.3.13+3.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/9d32ba768cb1f3723153a0921f83ddd2
-OpenBLAS.v0.3.13+3.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/1b7b22c5d5827ea542e1a275af1807303614f62c50a3f799ddd099e8546adf7c75ee7771fe627904c28d78bd1ed2a9ee16f11e1e63e2e02567ab725a5b4a723e
-OpenBLAS.v0.3.13+3.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/9471ef8b127fae0d7cd76a876da0e2f1
-OpenBLAS.v0.3.13+3.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/53e83a7f1abcb2465d2440e6ffcc8981b67908d1920bdd627fa7d09e78388a0d90d6e06881f2f8b187755ca61a144f95e0839358b1890d914b117e13701dc00e
-OpenBLAS.v0.3.13+3.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/4d9125d5a35b37b5bf411257748209b2
-OpenBLAS.v0.3.13+3.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/419f20c048c2a60b1f75ea095ede7c7079a4711e07850612b916d395b9b9e7014c01328269a13b1025718f7e2fa8f384343d5797e78941b091f806711800ac59
-OpenBLAS.v0.3.13+3.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/b05d4c436a309f862f0345e6d770e712
-OpenBLAS.v0.3.13+3.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/e136fe5ca3016715d657f792a832099f4e4b8f29b7e5e18b83b4706c9e1f421c9457f50dbf99b940b9663f3316677796eb38c6c391ed2135f308bee1ac59389f
-OpenBLAS.v0.3.13+3.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/a9c01d75a82bad9cdb640402dcad81d9
-OpenBLAS.v0.3.13+3.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/6143d3e2fc094d837c6d37047496aa3a4f4c5d01f3fb72e47c681c144b6af02c6b0310932910c0f3aac9a7e19c2bc0c14e0f792fc2e9193aaa1448bce6ccb84d
-OpenBLAS.v0.3.13+3.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/c59422761f5b7d9696a4c1c505787f41
-OpenBLAS.v0.3.13+3.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/22984386ea44e0e07889d89208556cc65af038b34e95f199aacb1499dc7c1e8086c7e2c05179bbd9723f0e40e791a348bbb8db6634c954efe7f79e4676fe36f2
-OpenBLAS.v0.3.13+3.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/29e90223144b14a357f42e44724c11d8
-OpenBLAS.v0.3.13+3.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/37b36ce6fe8fd14df505da7a0f57bdf5884fe5b030902a95794923a69d30eba7bc93a2da662db5648aa90bc95a88bd95625b3eb2e90a67fc9349f8f0e930ef64
-OpenBLAS.v0.3.13+3.i686-linux-gnu-libgfortran3.tar.gz/md5/28791e1d052d430c42220c18b0d6df61
-OpenBLAS.v0.3.13+3.i686-linux-gnu-libgfortran3.tar.gz/sha512/679d10e7ae4c2a8d38943ef20eb075ed9bc2ecb74dd4fb8e476c67b07659ce2fab39fb3bd30034dd7ed2d4d4103597a394e868642a92095de00188e373e38cd7
-OpenBLAS.v0.3.13+3.i686-linux-gnu-libgfortran4.tar.gz/md5/ca5dca3237f0b718e8f8603c3cb1e472
-OpenBLAS.v0.3.13+3.i686-linux-gnu-libgfortran4.tar.gz/sha512/914ab098759866ab3cd368b136ca6d2382a597ef426422314229a5f39344530e7c89f248c011eb183b8bde68e8d2dbd51ed7d0b6b1438c63322a100470378271
-OpenBLAS.v0.3.13+3.i686-linux-gnu-libgfortran5.tar.gz/md5/d0fdd71df8ad278b1054fe2a2e133807
-OpenBLAS.v0.3.13+3.i686-linux-gnu-libgfortran5.tar.gz/sha512/5c0cbbe3992b60a5c51760257f2eebf54113db0ad93de3c8f5f9ed1aa64ccb2a5485ccfea9e127ca473d910cddccaa5e278f93c90328fe770c78557c46f7d57b
-OpenBLAS.v0.3.13+3.i686-linux-musl-libgfortran3.tar.gz/md5/215c7e2db8890d637cc63707b13dc0c6
-OpenBLAS.v0.3.13+3.i686-linux-musl-libgfortran3.tar.gz/sha512/710e60446d05ad5f61e24f14769144ee621729973769f1e970df2fe330f821379af549f9dfbac6bb5034477443c063e0f7bcfe7cfe848f37ea4a9e44fc0af602
-OpenBLAS.v0.3.13+3.i686-linux-musl-libgfortran4.tar.gz/md5/27a879f4c90c1509365f7b4712ac7313
-OpenBLAS.v0.3.13+3.i686-linux-musl-libgfortran4.tar.gz/sha512/188fe3ad0b214d4b75f53f63c39183e609f34f8e0d70cce221bd46d3fac659318479413d65f66238f01c4d3c699224ac5d0e007bbfd27bdea527d36a37c22017
-OpenBLAS.v0.3.13+3.i686-linux-musl-libgfortran5.tar.gz/md5/83547926ba627e621437f7843a8ddac7
-OpenBLAS.v0.3.13+3.i686-linux-musl-libgfortran5.tar.gz/sha512/425e8534e5efdf1c6e10ba63ead9e57e687f0a61892d64de0a36f0beaf845cbd9ad914d564fba5d57a343316c5e6d768af991a15c40cad0b0d00304f4d05f6b8
-OpenBLAS.v0.3.13+3.i686-w64-mingw32-libgfortran3.tar.gz/md5/6f1994558d66532e3723d5a4d0bb01ad
-OpenBLAS.v0.3.13+3.i686-w64-mingw32-libgfortran3.tar.gz/sha512/52de4b2a43e512496a14c8d01987df0d75f78eef9ae273dace45ede477a04ee47ad70535566dd50bf9ea0c752625f8ad57e70d7dbee8f4e123fdb08fd4729b7b
-OpenBLAS.v0.3.13+3.i686-w64-mingw32-libgfortran4.tar.gz/md5/4a74f60ac55729a0a94d3f664a42ea59
-OpenBLAS.v0.3.13+3.i686-w64-mingw32-libgfortran4.tar.gz/sha512/94d77add6def979b01da61026994174f0b9f4a6a4f3d57a3994fcc4091e69b6dcb5dca004ca75fa7fc93ca256847094c99eb97b5ccf274d6412608f0ccd06c2b
-OpenBLAS.v0.3.13+3.i686-w64-mingw32-libgfortran5.tar.gz/md5/6e01d0fb2859715b6821d216431325af
-OpenBLAS.v0.3.13+3.i686-w64-mingw32-libgfortran5.tar.gz/sha512/667ee31f42614a6e99ffbc67668a320aba2b315684c812c3dda18929f2dc1199c56b39cbf012970950a5e5ac12b6f3d4ad6cf126448074b6b0fbe5dd665ce915
-OpenBLAS.v0.3.13+3.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/e117edbb8c90a7d998c45a6764dc4354
-OpenBLAS.v0.3.13+3.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/906f73b660439e0f453bd81d6b32cd59f954556a202d1ff7fc28c44d0b3eccfeb94f926eb91515b7b2047c342c152de25d007e3dc5d9b5646368d35c8d491cc7
-OpenBLAS.v0.3.13+3.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9c2d20df24b2fbe8a46bff4c2892be4f
-OpenBLAS.v0.3.13+3.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/916340447ef790e0fc80a37ad6b1bc173bcda7176204dac70a62d49176b08a2d90862cbe9587c6704548cdce71f41424d35746e2e73fea19841d26da02afd233
-OpenBLAS.v0.3.13+3.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8bbad84a47b5aa9ef3ac7621b5faa190
-OpenBLAS.v0.3.13+3.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/35fadabc402fd38fdcabf91da9d22745d92fe25d434276f79075ed0574ffe6a55964a77a8f50a22a0e5d0aaffbee17b1d5e415d7fd7f4b3f0b09fed3b64256b6
-OpenBLAS.v0.3.13+3.x86_64-apple-darwin-libgfortran3.tar.gz/md5/0813317427a6a7a49306b0e448c5a5fb
-OpenBLAS.v0.3.13+3.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/50686a37ff7511e786e8544b64d94c02a84f95d75e3dfb477aa3e504c6404ca9e3679add9a8491228e62343cd8e493933399dec654b016b431127171bf94cbcc
-OpenBLAS.v0.3.13+3.x86_64-apple-darwin-libgfortran4.tar.gz/md5/ab6bd7d9c409102bf586a8aed4ebd13a
-OpenBLAS.v0.3.13+3.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/bd9d1d1901004f775df626fb640d8631ccb4de985b5964fe62cf0058e419184f2d9250e20c6341add1add892d2011e0d21d3823536a2b647f7ca78f60db489c8
-OpenBLAS.v0.3.13+3.x86_64-apple-darwin-libgfortran5.tar.gz/md5/e369283ebd3ebd30fa05204ca53a5cf7
-OpenBLAS.v0.3.13+3.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/24d19a0fc08421493713e3a61b7c9519346d8237e0f74f74fe98bd34cdd0031d084cef7d46f43c53bfb7f24bf7e7bd0899f03df7dd167c4762b0f9c3d4173289
-OpenBLAS.v0.3.13+3.x86_64-linux-gnu-libgfortran3.tar.gz/md5/8a4063223249acc7be6b0fff2e7204b7
-OpenBLAS.v0.3.13+3.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/13ca28a88c9f5e61f1716a7244b9085b3e463a658bc414b2d3a16413ab448799081a3d92c7e67c71b702ab5337b5c9399f9a070c0e38d5ceb06526d4739129cf
-OpenBLAS.v0.3.13+3.x86_64-linux-gnu-libgfortran4.tar.gz/md5/84cba3ec45759145675c96c269e48240
-OpenBLAS.v0.3.13+3.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/a66872e004eeabedc3963d9cb341df2ceb46da66f97a6103fedc7b900adff9b63503b2fed87565cdb972112d881cf668a72a4a666fb16e75abaaee3e8df720d3
-OpenBLAS.v0.3.13+3.x86_64-linux-gnu-libgfortran5.tar.gz/md5/f71a6b64bca5a25132db4626749020bd
-OpenBLAS.v0.3.13+3.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/03a0315a48aaa4424adebc6670e1ec1c2819836eccfb8b8f327f66d8ec482b4ab45b76f1fe8cc6eefe408b33106fa0389690457e047ebbb08c1c8aa66281e87b
-OpenBLAS.v0.3.13+3.x86_64-linux-musl-libgfortran3.tar.gz/md5/98baf7e5285880268bdf86d5c4fe694b
-OpenBLAS.v0.3.13+3.x86_64-linux-musl-libgfortran3.tar.gz/sha512/263f4d98fc89c8d7593707b10859c369bf72bd80aeff26f2f7e9484b4360c9faa28ace33ae3cd5a5f0c4870d59c8412e9be3fac212af5c17689bd609d2054481
-OpenBLAS.v0.3.13+3.x86_64-linux-musl-libgfortran4.tar.gz/md5/6113194122b3fce00fe129af2ae48526
-OpenBLAS.v0.3.13+3.x86_64-linux-musl-libgfortran4.tar.gz/sha512/fd2b93ded2be5a2850cdfe55a02465f41b0bf1f8b5a0d7341ea475de08f870c45ee320d15492a526fd6f4490e2b944898c974a0dcc1a0294e3e585593e4ad734
-OpenBLAS.v0.3.13+3.x86_64-linux-musl-libgfortran5.tar.gz/md5/902b38c5eba17b699763936f6f7c4875
-OpenBLAS.v0.3.13+3.x86_64-linux-musl-libgfortran5.tar.gz/sha512/34eeaa9cc9fb7e9b1a2151f98b6d2bf36a6c0edc87e02fd3aa322547be4ea195f2c1739b90a9027dfbc9512f4af9e8d76e6d78c1a1f8cce9173aebcedf47c550
-OpenBLAS.v0.3.13+3.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/7b01325206d4e9ebc9d8169ab19cafa0
-OpenBLAS.v0.3.13+3.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/81a34637bd98073e64ecd11682e2ecfa7906f1032cb2549ea72adc83d471790baf3b6c43f6247a33469c7c275fe444f488f0a04139d131fb4a52919a25f05ab0
-OpenBLAS.v0.3.13+3.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/6df828a5f9cb90e6d897e7934a6d447c
-OpenBLAS.v0.3.13+3.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/5a703ce4e1360ec76c782237e827cd99ad65696a659cfd1dbeb9ad8d1b751c4943e3edde4c53c2f1c17afdcde83bdfae60f6116a3602a96129f16f77e2faf5bc
-OpenBLAS.v0.3.13+3.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/2c9cb0175741399c21f7a685795833b3
-OpenBLAS.v0.3.13+3.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/6f4b51bfa30310c85c813b03e6fa44acae7479935cb9fdc977286519bb00b3860f1ea502a3c2072884739dd108103dcfb9ee167a9d31d1b31fca7f087fc7dc7e
-OpenBLAS.v0.3.13+3.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/ed4b72c7f57927c3b89efcbd7e9bb21b
-OpenBLAS.v0.3.13+3.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/c6663ec291ad053523f6a1d4c5fcbed83c7a412f534db90e0f690582e5ec19c7bcc65c903322e7e2a75a453caf5cdd9251ba7a83de5d44b8cd45ca022c6f7fdb
-OpenBLAS.v0.3.13+3.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/960dca2ea7e3d72ddbe918ed61787f25
-OpenBLAS.v0.3.13+3.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/a78af513c6b6b5a0fd7fa5f5b5c598d4724025eb12e57653559c772092095b0c8b2e50ea174205d28aef9c3c503999674882c3771f02a2683bdfa62abb1f4474
-OpenBLAS.v0.3.13+3.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d3a0f811ae8e70033c9c75c9684dba2e
-OpenBLAS.v0.3.13+3.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/0db4a8aba989d291b9ffbb8856f0922b38f84e6d321ef9ffdb56b455dacbf32252109c505e8f1d37f7e75c20d839ae3490a9353bbcfaefe7bab7fbef3b0a0c1f
-openblas-d2b11c47774b9216660e76e2fc67e87079f26fa1.tar.gz/md5/d50510c8b60b477b1ece4abafe66bf4d
-openblas-d2b11c47774b9216660e76e2fc67e87079f26fa1.tar.gz/sha512/95ce349353bea5e9c30455410b92fa9ebc833e58163a53a65b396c76fddf19147a323a3c69eea5892354488c67b027e20dc9b03754a0988f3bc8a4e9ad446b8b
+OpenBLAS.v0.3.17+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/6d1adab725ba1f1e63a773610120c415
+OpenBLAS.v0.3.17+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/004b7b9babaeded60e2d4d24bc07f6c5edf04ee6564cf9b2ee5f39e72859a2100df637631d79cbb4be26b740141d15c9794c443d2798850dbf1cdaa47c1ee1ed
+OpenBLAS.v0.3.17+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/d18088e102948fe95b13ef8a0b1420fc
+OpenBLAS.v0.3.17+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/855860b44af6c9c9b40f3e07e6c6db2b54714a211788bee377becd9da58f17bffe8118c6317326b47cad6a8525a2d55f355fde1d4f7ecf75731cd48032deae21
+OpenBLAS.v0.3.17+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/730771a8901aa54e412551f4c3c14fed
+OpenBLAS.v0.3.17+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/5beb259e1e7e3edb5b60dcbdc97683c265accaaea3c0e22e688d09cb82ded75413e7ed6ac75f1d87465191baa387f012b50a2ddf921732ff5642b5d0e379ff3d
+OpenBLAS.v0.3.17+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/b7d37884a729c0cad1f90407e6357649
+OpenBLAS.v0.3.17+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/befc096f478947c4361ffe29256ab8f65f0d8d07a0932caa16ee43339422a8af9b79ce9a7bc2268ecc0edde57a874fe086dfd08a758d310572c95a2baff24d22
+OpenBLAS.v0.3.17+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f9fa013c02ed7dd0b8cee2c3dce69807
+OpenBLAS.v0.3.17+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/a6707805d4d449f779a96d8e92483d094c604045e2f1e1c9ad40e30e6d38642b9bb232c4bc41e27d12eabb280cb5c3107240b6f5230502116bb2d4ae3b31172f
+OpenBLAS.v0.3.17+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/e355b1382d91b69c40278c955d0207f2
+OpenBLAS.v0.3.17+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/67e6e82f5fd5139f3d8e62eb7b93440da1c4e78acd3662e31c631a9f15219bc1e713428973a128d9f9993941966eda4ae6c711ebed7d19d331b1b66e8f1e2c8a
+OpenBLAS.v0.3.17+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/d64af394c32a8e85490a6f82fea2bcfc
+OpenBLAS.v0.3.17+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/91bcedc5d0bb806a487446049c604f3c149ff7f1ac28c9f24468986181bd10e57ac78613b8426f4626d634752b1de6c5f953ce99c3570451e3f3ec44bffa42da
+OpenBLAS.v0.3.17+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/c4e7eb9669ed6882c4dba5f691e67ab2
+OpenBLAS.v0.3.17+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/50f9053ba121c9c593fe08114562ba0d11a1c8bc46eecb376dbab06c57741d046de281ebc9499d5a87a7682782d3429730d262d7ae05d2c54ac7bb71635a4aa2
+OpenBLAS.v0.3.17+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/0c57cf0f5e6b097e7c7b5162e3d3fa93
+OpenBLAS.v0.3.17+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ee1b0842276366f2e5e6d5a600fd9e3aa6915f3c96db0770be7fd9fbf9b9fab7f709a8087abf9f7ae1f291178730fcd5791c928255e8ad8d9b818b62a366f95d
+OpenBLAS.v0.3.17+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/6514e09f96619e183bcecf196ee03356
+OpenBLAS.v0.3.17+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/b2e091f720009e970f62f53846b840a6e1106ce17db709bc940308f1c4f0bae02ef312aab3511e62147f1ecd6121a3ce8263d94931acb53122442f78e1859160
+OpenBLAS.v0.3.17+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/7e61f954bd743ac2c36cc18d0e956b80
+OpenBLAS.v0.3.17+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/d4589f1b3f199e50ea37ad0dde8906cffe5bbc559005b646e2f69b9023159a2f20d7b0eb8b53bd623fc9c71a4f785bb3e040f9c6aa9f850424388c6c8641bbd9
+OpenBLAS.v0.3.17+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/c3f6f6a85b8e28d8a9bc5cc9d8609045
+OpenBLAS.v0.3.17+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/0315eb6fc19e42b1d2aaca492fcbcb646b628af40885e579c232979a8325abd181f88ec879cc04bd91d2a724de04e0c8988d3e2b5045d5c775a5833d39a65166
+OpenBLAS.v0.3.17+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/c1224f944d53902038f590cbb3298943
+OpenBLAS.v0.3.17+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/3083f9943ede3b3ab3ddd2d24ec835878ee5d85d43c7b0473f0b531e874eddbf87e69e378aea8f07f719bd1850f5da96d96c8c52a58b4af7c9ccb8916b93ad38
+OpenBLAS.v0.3.17+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/3b15317daaee8186e8e35acc5176452d
+OpenBLAS.v0.3.17+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/90cba606ee49d4d1ccd971df87668cf43d808fc98ea66bafc791362d38ae01f284d5a01ffe266ba0333f5dcc7dfbe81807526ffa43009a12020f38b2e0ef042f
+OpenBLAS.v0.3.17+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/48c09596f1e5f92d42db46398582a7e4
+OpenBLAS.v0.3.17+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/07bca40a4381b2e846d01bc09fa81b4aaf2e03ade52fe11df47f9c9442d03743f33edcd6415ca9c8a1557cf996fac5ec7905a60b3a013fa8c2b3b5488df13fca
+OpenBLAS.v0.3.17+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/1678aae19e570cdc7cb22d4bd8f67c81
+OpenBLAS.v0.3.17+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/5530e95398b372e78aab45ec6e3d59408ed0321e3da487046d58a359399e0b624cc7aa1ab788ecf6d36f2fbb44bf7ed26fd4d1eb0aebb414886b0279491d09ea
+OpenBLAS.v0.3.17+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/2fa577470606aa8c97156a3ba44fe65b
+OpenBLAS.v0.3.17+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/ac57336720b6b79512d08423de65803c8b2d854a67adf5e3766628d1d67b7fc153cb6cdd463b3d801b6cef6bd38a2174a861c25825073a6c221b96ddfc7914e3
+OpenBLAS.v0.3.17+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/93d929aac8e7e3851be1d23f7d589b0d
+OpenBLAS.v0.3.17+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/5be22d3aa0b5e2a2d6fed79e2669c89630b236c506d1c6ed55dfa1158903c298b827583a326a413f1ea5ae336de6d3d508b76f72e3e1e0729643b86543192f7b
+OpenBLAS.v0.3.17+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/f5efd57ea0e7acf8c47e7759759f8294
+OpenBLAS.v0.3.17+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/1d51b3521c184a39bd5471ba8b0fced85c51d9d69580e18468b26d77a94d050e8cb9e2cac0ca57a598cd90e428b75b2002497101bb7c148a28c9f1998dc09ce8
+OpenBLAS.v0.3.17+0.i686-linux-gnu-libgfortran3.tar.gz/md5/c6b1957bbda6aec3941a54614aef5c8f
+OpenBLAS.v0.3.17+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/261db11d44856431a714713fd06f8de67fee40849a693c862958a8eee157b7c99ad9a928bfff263413cbc157331fabf42e1139d9ca122903d25eb750ac873d6a
+OpenBLAS.v0.3.17+0.i686-linux-gnu-libgfortran4.tar.gz/md5/778227c00545c094e2db0116764bfc4b
+OpenBLAS.v0.3.17+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/98b66a03a2e7a4abce03dc5f8d5bd10908d2caf84efdde35df2a2ac026c1287feafbec13ac10c42161e5cd21e02005fe56cb86ef0fd5ffbef260a965e913b1be
+OpenBLAS.v0.3.17+0.i686-linux-gnu-libgfortran5.tar.gz/md5/c8a81d90002d980b169e14d3f45de420
+OpenBLAS.v0.3.17+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/907bcbb1560e18b175b9d4251e634e288433b113d91e27780ca1307d6678ac67e7c8068ae675793475ce8004d1de06b522d6203a241c1cece85efc3f54e55d0a
+OpenBLAS.v0.3.17+0.i686-linux-musl-libgfortran3.tar.gz/md5/dd84709d38c98ef8590293022b4e793b
+OpenBLAS.v0.3.17+0.i686-linux-musl-libgfortran3.tar.gz/sha512/667206ba5f9d57bab2b7355c151dad8deb51d728854652a41274f092a8415509c80de4f199ae7532750fbd92024b2a09aeaf02f3e468edb67e15e06745f57c41
+OpenBLAS.v0.3.17+0.i686-linux-musl-libgfortran4.tar.gz/md5/7d7e4d19dbfa8df28c3bd2fdf3159e25
+OpenBLAS.v0.3.17+0.i686-linux-musl-libgfortran4.tar.gz/sha512/0afbb8b13838df1d65390062d57ab4821598a57b9e2bdd1e2fad2429f7379ef3a3732c35fd4b48912dcbe913d7f092cbc780e1f57fdcf6e0c17a1c09824493b5
+OpenBLAS.v0.3.17+0.i686-linux-musl-libgfortran5.tar.gz/md5/e824b8e0b9ff9801825cdca46fbdf400
+OpenBLAS.v0.3.17+0.i686-linux-musl-libgfortran5.tar.gz/sha512/72f64d83c74ba1e14fc920e3532ad1f999315e4c735faeac8cb435427c07e1bdcd61c3e68740853822d789931ffa72194785df3dab9734604d66af509653a2dc
+OpenBLAS.v0.3.17+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/0590c643b7d3fd97ce9cbd0ae1eaef58
+OpenBLAS.v0.3.17+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/070303eded91ef484140068f1291dbebac763e1336fdb860affe4246c16e156751a7e05e51591f3e39d1a4331d84a53a009f45b49be56e6372ec81a7630c8d39
+OpenBLAS.v0.3.17+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/e1e5bd791931b93f4b9da96619796479
+OpenBLAS.v0.3.17+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/09cb091c7a065697d72320da18fb29a8fa9c49d97047a03f1a881add304d5613829083a472ea082cc3779ada82f138eaaffae9fd5233a47ac77777460465e058
+OpenBLAS.v0.3.17+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/f78f0e4c205ead8826d93b35268ef579
+OpenBLAS.v0.3.17+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/cb5eb6c2d037d46cbcf7540114a03a1a41a97682460fce7156bfa576bc3c75dfc16d4c8b6974ec677b429fd5063932e9f9611f8c7064ed585764999c11fb1b02
+OpenBLAS.v0.3.17+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/82368cc4d60d37483f40d447bf850af0
+OpenBLAS.v0.3.17+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/9ca84d891a0323f6bf4ac757eaff2305e66097bebda346280b2a7390fe091ab0c0ad8770e8b32c7cdd273a162f36ee43aedd94536a543205987767713b2915e3
+OpenBLAS.v0.3.17+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/58a3b9eeaa72bb9eeaa83eb6bafa49cb
+OpenBLAS.v0.3.17+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/4a653a13749a580c1b0f907dfc1597f24338913183a04f252a7ed00c7c5e32b48246fe4e0028a717fabd1129fa81e2aa42bcb23122541c81d2397e383f430bcb
+OpenBLAS.v0.3.17+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/9335da7abeb89cefb6872560f76b5091
+OpenBLAS.v0.3.17+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/8406ef5799d367c85e33f51c502f4d145930da64f78fe254611a425fd68952d3d71ce62b27232535fb7429b05ff2dc95eb2174e63d93d54aab52714355ff99b7
+OpenBLAS.v0.3.17+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/552724d8d498de5343f95acf2cc43c4a
+OpenBLAS.v0.3.17+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/6c71a9ba1f0008ea90ba637fddd79fa6c331e52473ca5521655956d9a5059657d550092760855840eb52060b5c33020839771bad23c061d81d47c1fdaf28397b
+OpenBLAS.v0.3.17+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/611d1d1522197db50deb468a6882dd2b
+OpenBLAS.v0.3.17+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/392c080e557584771b875ea7bddf5d9e8d77d3571f9c5a22cab92b749c7e48444bdb17c909745dfc92859bcfb8b9613abd71b4996fe389291905aa2cb8255e34
+OpenBLAS.v0.3.17+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/3d45861e231ba250feab838ae24e2b12
+OpenBLAS.v0.3.17+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/8315f5832fc3653a1b57ec8a1d9551066ee506e85344ee571b9c93648986f71c35c3bae51f9a8e80a8268501e9810974863975f9c12211c3c08da165bc0c1592
+OpenBLAS.v0.3.17+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/102b3ca4e488f62a3489ef2d4714e65e
+OpenBLAS.v0.3.17+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/5075492ae2336d261c9549263a37611d51bb82d6ee1bef4e67bd6cf2927d621a4a21a6cf3a6d878bad1ad7e10d0900e74e6d020ba0c3f9b651650fe9e5efb0c2
+OpenBLAS.v0.3.17+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/782e81b4d7893fd93443cc20e5adb876
+OpenBLAS.v0.3.17+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/425142132b0e330bb12a86c54779ea87ad90ae05d7e44121e19855412b47e629a53b5aa45a9550f6266a7a8e9194f4f89ef070e1b95bdab9716b0f79369bb9c2
+OpenBLAS.v0.3.17+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/243da9baba80b24bad684e74f01b844c
+OpenBLAS.v0.3.17+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/6ddab469dffaface8f2ab6b8707b132c7e8c86f3f6354ea53a12e606ad22278fc6bcafdafda678bd5e31778dda5b4000ae36b3e2096ac1d0cfefedea0549e98e
+OpenBLAS.v0.3.17+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/04b662adab755a56e394de2acdd3e6ff
+OpenBLAS.v0.3.17+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/f566fc0d492736cbb9c58b6166378a50f9e0c2b6e3f5dafd841ce55042cc87f8bf4fd4a4771a8301dff013b1151da50907109e91f15eed3cadbb59e2d59cce4e
+OpenBLAS.v0.3.17+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/a50ad857c166df8ad8e7ef130ca4847d
+OpenBLAS.v0.3.17+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/44e10a9f57ec6023a9cce96f9b7b069ae6127abe8afdd673fc86e99aa86eb2c394401d4fd29dfb3708de19e6cc723c797cd42bc6259e663c25179c13a78083ef
+OpenBLAS.v0.3.17+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/cd30d36e87bfb2b3c682e12fd23896f6
+OpenBLAS.v0.3.17+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/7f19efc20c3498be51d7a6c892ec74cf9183068d1bd15190800cc93ee84267d1f71cdac3f3684fbf8a9e2fffed5dd1c4bf69204e8533ddf4ed91a82bf8608405
+OpenBLAS.v0.3.17+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/618d5c45e8e5b83a6a4905cdc1a2ea6c
+OpenBLAS.v0.3.17+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/9b0af80e94a450098a771eaee55f714845e3f522f38d420ad34c8b79f03889384acb2cdfd563c5a01102e91af98c3e781bf499c9aabac93d58b14665a9158553
+OpenBLAS.v0.3.17+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/86c97dca50aedd3cf2d94c3f85c9d40a
+OpenBLAS.v0.3.17+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/db6c7b5c8a364e0ed42d65953dd05ac5ee436006de7495a00c4224bc91a6def8513542f766217c86a44214580f37e58f6e739a8b24312288ebd2169acd2213c6
+OpenBLAS.v0.3.17+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/dcbd865a7cccaec4fc187784138a785b
+OpenBLAS.v0.3.17+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/5d16120bea378d60fb6494421c24c9c5a02e78c1e38c7d1d1ffa6cc857805926420f0bd9678ba749809f15ac2a5c3f89a4f0a1ae015008b8f6be54c774ee6078
+OpenBLAS.v0.3.17+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/1cd64551545735e15e19144636f79550
+OpenBLAS.v0.3.17+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/3d00581f9f7a557f906fb93b755c22794ea4033d09ae0b3241fcac3275d924a6abec0353f46ddf9bb1910a702922b106a744275d70c4a41964469c227802dff8
+OpenBLAS.v0.3.17+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/5884cbad0a794115cf686092aadfc740
+OpenBLAS.v0.3.17+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/397ca83c411b58ccdab97c6714042540a92e889d8d40350d2b6daf332526c7d28fbd8956aedaa3509e138fb7577edabf750f74b19ca8f821493bc95324dc77bf
+OpenBLAS.v0.3.17+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/e21769fbfb75901961290b8c4bf9269a
+OpenBLAS.v0.3.17+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/b28c9d6379f084f7a08222d7907ef5174fd52a6a13b189254fdea9911dd6db45e45924c9f24e00ec2e801669f50460d993288f0047b019a2ffa3bbb0fb4a5637
+openblas-d909f9f3d4fc4ccff36d69f178558df154ba1002.tar.gz/md5/4acd59865ca8b50c823bef1354148930
+openblas-d909f9f3d4fc4ccff36d69f178558df154ba1002.tar.gz/sha512/227ee7decccf9bdd2e5754757f590e32ada95b576db9eddc2c74ef06d35aba1db9438acaf57750184baacac741917f7f5ad9f15991d31314480db371fe59cc17
diff --git a/deps/checksums/pcre b/deps/checksums/pcre
index 05a06f9844ddfc..f7e1fa0c1a3ba2 100644
--- a/deps/checksums/pcre
+++ b/deps/checksums/pcre
@@ -1,3 +1,5 @@
+pcre2-10.36.tar.bz2/md5/bd7e7421ff3fa2e2d5429229ecfad095
+pcre2-10.36.tar.bz2/sha512/fc2a920562c80c3d31cedd94028fab55314ae0fb168cac7178f286c344a11fc514939edc3b83b8e0b57c872db4e595fd5530fd1d4b8c779be629553e9ec965a3
 PCRE2.v10.36.0+2.aarch64-apple-darwin.tar.gz/md5/12ac3bee39df3a79f868f6463964953b
 PCRE2.v10.36.0+2.aarch64-apple-darwin.tar.gz/sha512/a1a1312931deb7f742f80886188babcf9c179ed3f156626fb23d92633fde896d1ee9b2d72cd99ae4a1f8048971b6d939e9b0b10c455d4eeec24b265968593486
 PCRE2.v10.36.0+2.aarch64-linux-gnu.tar.gz/md5/32240ccddee3040aeedcbe69ea52fcad
@@ -30,5 +32,3 @@ PCRE2.v10.36.0+2.x86_64-unknown-freebsd.tar.gz/md5/97410029c0b6ed5f7fb0d14e1f121
 PCRE2.v10.36.0+2.x86_64-unknown-freebsd.tar.gz/sha512/229e910759da2959ddef83ca89e05a050c266b8e755c85dfce6a786658be541911c3b78a0fca7dfdee1b41fbbdccf57da75cf9fe45fd2821dba8d2aaeabfd538
 PCRE2.v10.36.0+2.x86_64-w64-mingw32.tar.gz/md5/39827564bca329768e0380bd79b869fe
 PCRE2.v10.36.0+2.x86_64-w64-mingw32.tar.gz/sha512/4579049b99fca3334d726b0ca1f07524d1643a758e375b5b02b8f294ba7d9c2a4130da1a1523de29033233a8848105b3cb660e15bb4a759593405d805ee99883
-pcre2-10.36.tar.bz2/md5/bd7e7421ff3fa2e2d5429229ecfad095
-pcre2-10.36.tar.bz2/sha512/fc2a920562c80c3d31cedd94028fab55314ae0fb168cac7178f286c344a11fc514939edc3b83b8e0b57c872db4e595fd5530fd1d4b8c779be629553e9ec965a3
diff --git a/deps/checksums/suitesparse b/deps/checksums/suitesparse
index 98bf43c0c3a923..b34ee2a852f0ee 100644
--- a/deps/checksums/suitesparse
+++ b/deps/checksums/suitesparse
@@ -1,34 +1,36 @@
-SuiteSparse-5.8.1.tar.gz/md5/c414679bbc9432a3def01b31ad921140
-SuiteSparse-5.8.1.tar.gz/sha512/3d986157acd39968216880c2de30c54d72e0addd7318a04948c1b842ad9c876bcde32f292504d4540a4baa998a368dfc0db000606a2d00f61760a92d5b8c1cb8
-SuiteSparse.v5.8.1+0.aarch64-apple-darwin.tar.gz/md5/57fa5164c7ee989ff95571037c0dfcc0
-SuiteSparse.v5.8.1+0.aarch64-apple-darwin.tar.gz/sha512/a12ac3ec77ac0b78595bc247a4fc37b9136e9a5095abf061b3fade60130b153d61b5a3c602be87c7334660b67f3539075975a9d4a1545d8ee5fd62c734941032
-SuiteSparse.v5.8.1+0.aarch64-linux-gnu.tar.gz/md5/a932269a0fb499d931867acc9505e49b
-SuiteSparse.v5.8.1+0.aarch64-linux-gnu.tar.gz/sha512/2983c095533f7d8c2fa81779bf907e9024b1bfef58e934a4d524d767911a41f3006babc20530b3b23854d87adce02eb5e7e8a98d4ad3d853b03abc64cd8d5e6b
-SuiteSparse.v5.8.1+0.aarch64-linux-musl.tar.gz/md5/ccc8c1b5378f208374c45ceb6c2b38a1
-SuiteSparse.v5.8.1+0.aarch64-linux-musl.tar.gz/sha512/eb357385de23ce864dc15892100372075aeb2ea2dcdc091a24d746d22278593391ff30cfb94b4c68ab66b03414f13b50614312f46b27f972b977541050b96630
-SuiteSparse.v5.8.1+0.armv6l-linux-gnueabihf.tar.gz/md5/ed3ea07e0996126bfac86e3abee78fb5
-SuiteSparse.v5.8.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/d9de5fb0c0627376d0145ef238a1910f8b0d2037180dcf32caf2bae32600521641abcf20d24d725326fa9bbb52581bb6a4bfaccad292de875cef54b3d721d733
-SuiteSparse.v5.8.1+0.armv6l-linux-musleabihf.tar.gz/md5/a71b0a47cbc98144f52229e10f4cd20d
-SuiteSparse.v5.8.1+0.armv6l-linux-musleabihf.tar.gz/sha512/1c61f4ec94ea8adbd2aef50a6f42dd767325400579ff91019c7a5d2555fac5abf17a05201b22b1ff389ce795267daffbef2d961eb46125348ad64032133797b9
-SuiteSparse.v5.8.1+0.armv7l-linux-gnueabihf.tar.gz/md5/bd585f667a26df26db98ffab82d3e40f
-SuiteSparse.v5.8.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/1a7b1d0fb7cc5c308a10384a8a8c48cb789ab40d31693722feb01d14ee2781cacc74f0d3082f2e7149805d303002e05e70ca7c9ce715fdc93cbfdc92b84ff03f
-SuiteSparse.v5.8.1+0.armv7l-linux-musleabihf.tar.gz/md5/960379f40ad66b94004c7201a0a0bd13
-SuiteSparse.v5.8.1+0.armv7l-linux-musleabihf.tar.gz/sha512/99745a0f4422f9ecb86a6c246cf0a1e5f02151adb5f17b600515b7fa220cdb3e7d709f769270af1665d693955500e27e0662ea97069affe209c4a50e4381ec92
-SuiteSparse.v5.8.1+0.i686-linux-gnu.tar.gz/md5/55ad27f4ac55e6c759160742904958ae
-SuiteSparse.v5.8.1+0.i686-linux-gnu.tar.gz/sha512/84b14edf20a9fa9be0fec034866bf469ae7b07924e28fee6d5c4fa5c70826b4ac68c0c7abd5496b76f342c93d1b85f2c2f601ede71c5fba484f6776aa0ad04ab
-SuiteSparse.v5.8.1+0.i686-linux-musl.tar.gz/md5/db6ec8eb4aee46f158abe36e593d9c20
-SuiteSparse.v5.8.1+0.i686-linux-musl.tar.gz/sha512/9ecdd62891730f9b587a3f6debfc859c51bc1e6fc7a925339442f25c9c9be587e5df90572f87236fc79302dd2435990d6d054a494ff3498fb31d83b0be11d0f9
-SuiteSparse.v5.8.1+0.i686-w64-mingw32.tar.gz/md5/379dbd76bc5b33b2f00e25b93f15d7c1
-SuiteSparse.v5.8.1+0.i686-w64-mingw32.tar.gz/sha512/91b4a4b3d5fd710510bba2ffa7b19bdf2860fb4f6c1946dcb421e4143097e7b77ee3bc780ed89726152f2c5e72d4e897e5eaa152b1d97176ed9d0e4fb2cdb6c6
-SuiteSparse.v5.8.1+0.powerpc64le-linux-gnu.tar.gz/md5/13e5e383de77448d7fca6802a27e2fc9
-SuiteSparse.v5.8.1+0.powerpc64le-linux-gnu.tar.gz/sha512/412611fd82923e042001774cbc7951e8d6777843b308f6b25cf361c05b44d1dafc1639e7526b0871ec637f15ac8fcd247e9201ba0d75bfe0bf0900a9dace1001
-SuiteSparse.v5.8.1+0.x86_64-apple-darwin.tar.gz/md5/d266370cc574c8ca8b2b8a94d6b877f4
-SuiteSparse.v5.8.1+0.x86_64-apple-darwin.tar.gz/sha512/2377d662f91d01229e3c44ec1c5624b6361e823ff90d625acc3bab8f68ada1b3b7b67f4e56c8896f890e5780bd2fcb494b5a1f91c9c7803715eac2a2a4bb938f
-SuiteSparse.v5.8.1+0.x86_64-linux-gnu.tar.gz/md5/2545d16665a42f36fbbad9198a9668e5
-SuiteSparse.v5.8.1+0.x86_64-linux-gnu.tar.gz/sha512/66cf497a002b566a681531873da0b5dc3b4f79414068b7ae53845325afb2f56a55069ff5ba5f47de0eb961af4df18cf8495019150f163213d32aa5b12c511ce3
-SuiteSparse.v5.8.1+0.x86_64-linux-musl.tar.gz/md5/8a8ade173d751514e03a6987aee56ca1
-SuiteSparse.v5.8.1+0.x86_64-linux-musl.tar.gz/sha512/7393063fd2f88bf5dbe1378d6d34206bde446f1ed971d2dd334c3cca96075334f4f8ca69f7d406d532a631fd8a76d739df27dab7928e596a1d38ba9c96fbc9d6
-SuiteSparse.v5.8.1+0.x86_64-unknown-freebsd.tar.gz/md5/674bbae00649ff495aed5fb57de80966
-SuiteSparse.v5.8.1+0.x86_64-unknown-freebsd.tar.gz/sha512/4a1ee55b55149549cbe022b3354bb136b7bfbbbcef8254c2ec91aad8c921c689dbedd208003c8805c320e5d3319e9b36945c117eaac4a634f31e76ff3e579f3d
-SuiteSparse.v5.8.1+0.x86_64-w64-mingw32.tar.gz/md5/f52e3758656acf8a4342f8a34584c266
-SuiteSparse.v5.8.1+0.x86_64-w64-mingw32.tar.gz/sha512/a9a335bae4b588147bf11b301d11e4d3663011c3f7bd449382b09bba3eeb108501ef1244477f11c408d463bc61c2eff613a182bcc56ff2acae630d84483bb724
+SuiteSparse-5.10.1.tar.gz/md5/68bb912f3cf3d2b01f30ebafef690302
+SuiteSparse-5.10.1.tar.gz/sha512/8f85c6d63b76cba95707dfa732c51200df7794cb4c2599dbd92100475747b8d02b05089a47096e85c60b89bc852a8e768e0670f24902a82d29494a80ccf2bb5f
+SuiteSparse-b15c39be53f7823c721c1f8a7c036105e2baa04a.tar.gz/md5/412d21908e1d00de43c158b372025430
+SuiteSparse-b15c39be53f7823c721c1f8a7c036105e2baa04a.tar.gz/sha512/c2cbb59daeee6493ccf741fbc1baad92f444f0f2320e84a102c7fec8fc30532acb5a295edd19dfa662f63d14b2bdaadbec5f81e64d946fc35ffb1dd6ffd80abc
+SuiteSparse.v5.10.1+0.aarch64-apple-darwin.tar.gz/md5/b9392f8e71c0c40d37489e7b2071c5ad
+SuiteSparse.v5.10.1+0.aarch64-apple-darwin.tar.gz/sha512/109d67cb009e3b2931b94d63cbdaaee29d60dc190b731ebe3737181cd48d913b8a1333043c67be8179c73e4d3ae32ed1361ab4e34312c0f42e4b29f8a7afda3e
+SuiteSparse.v5.10.1+0.aarch64-linux-gnu.tar.gz/md5/1b2651ede4a74cd57f65505a65093314
+SuiteSparse.v5.10.1+0.aarch64-linux-gnu.tar.gz/sha512/753f986a749d139f9a6baedac059d8ed8efdd716ed28eacdbf00e6ebe863b4e17467f01a9693dcb39571d38b4b5c4c1375dbb790b88a7e704116e3fe83f7ff3e
+SuiteSparse.v5.10.1+0.aarch64-linux-musl.tar.gz/md5/051ff9bbbc95c57d58563df8a2c8eedd
+SuiteSparse.v5.10.1+0.aarch64-linux-musl.tar.gz/sha512/855979ed8d6290c529d9c9e82944fb15c88f9d9d8da7db1fa2fc34efb0ed985fc6554312882107f26956f2a18ae985918909cd834e068b874906c21a0f53b6c9
+SuiteSparse.v5.10.1+0.armv6l-linux-gnueabihf.tar.gz/md5/dbc5fb4844077084663612af26e180ce
+SuiteSparse.v5.10.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/b906f7275ab58006acd52927e7e04c79eec59b5f28e9d7e5d5b8556c0eedd54cfff87e494373702c205afa2384ee6b0f2bb5e811fd440b1b50d5c9eee1b47b99
+SuiteSparse.v5.10.1+0.armv6l-linux-musleabihf.tar.gz/md5/7770d256e76d5ce1484c3781508cc3ed
+SuiteSparse.v5.10.1+0.armv6l-linux-musleabihf.tar.gz/sha512/4f1d46cc8da5a7eff665b4bb96f9e21319f39231f98a6164d8c3d654d5b6f93c3e4477f55a39a80b7f8125a78d690cc5a1cc58f29143ba4c109a4182d7fa2110
+SuiteSparse.v5.10.1+0.armv7l-linux-gnueabihf.tar.gz/md5/ee1fa978bcfb264842749f915bbefd77
+SuiteSparse.v5.10.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/9592a42f6474fd89eea1144f62ecc2a23796ad251173a9c36ccbc9bc18dd88687ce49f51528974f56b5652e2ab15f0aa41634513f2cc0b3c54259de3b68350bd
+SuiteSparse.v5.10.1+0.armv7l-linux-musleabihf.tar.gz/md5/30f708421b92158c7741c82576e9047b
+SuiteSparse.v5.10.1+0.armv7l-linux-musleabihf.tar.gz/sha512/d8793d48757dbb62aa7a21c215b6d6e63a26ce4ba740f1f7f42a3e485ad3d9628744f021ad9cc96e29c8c88bfb2f02ea92865c26b971ca739d3c05c7f28875d9
+SuiteSparse.v5.10.1+0.i686-linux-gnu.tar.gz/md5/9018b6168b9a687bab0c9a9cbf45afba
+SuiteSparse.v5.10.1+0.i686-linux-gnu.tar.gz/sha512/308a92f441af6855517c40c6871b4935251677c05cc082c21fd1249e0137b635fa524f60cad61c7524026301a6de7ffea0ad1f4b9a4d9d6e3ced3f332a6719d4
+SuiteSparse.v5.10.1+0.i686-linux-musl.tar.gz/md5/99143f8d6de4f071ffa19942252b6dec
+SuiteSparse.v5.10.1+0.i686-linux-musl.tar.gz/sha512/9fb719fffea03296dfac8bc221bafc3ed8f7791749eca6c4b00265994de1be5d242e7e5184693603c745b39c4538feb11ab283204e0e33df2745f904cf0c7252
+SuiteSparse.v5.10.1+0.i686-w64-mingw32.tar.gz/md5/d049c943fbda2c8380dea33e16569275
+SuiteSparse.v5.10.1+0.i686-w64-mingw32.tar.gz/sha512/174768464432b991ecff88d5e5126caca83672fb5173115de59bc2387ef8aa75a56d3e84957fce625fabaf50ba462549f2ea828aea7258be7513835b7fea2e31
+SuiteSparse.v5.10.1+0.powerpc64le-linux-gnu.tar.gz/md5/f01f7e134f8ee77849f3a46e773c1ff2
+SuiteSparse.v5.10.1+0.powerpc64le-linux-gnu.tar.gz/sha512/dc0339f2b35f05d49fbd1dcf1822c774a07af122fabc8e00eb7435dc53fcf82b3c1ec24e2bb41b1a58d3f8ab8903830eb7ece19dc6fce3f5e73d90a3dc3c4194
+SuiteSparse.v5.10.1+0.x86_64-apple-darwin.tar.gz/md5/02975a8670660c5e79eab0a70b051a0b
+SuiteSparse.v5.10.1+0.x86_64-apple-darwin.tar.gz/sha512/e55685ed7a63318c5baa326795503f13f031e0a617c045c972d5c89252ab51e7325e2b0425ca10dfbd59e79c5b4200545f5a4944fddd376e7610b6ebf74ded14
+SuiteSparse.v5.10.1+0.x86_64-linux-gnu.tar.gz/md5/6c111d315fb25c529710722bd5ae6af0
+SuiteSparse.v5.10.1+0.x86_64-linux-gnu.tar.gz/sha512/c971aed91bd695a0f7f735f58ddcb075d32b9522a8a50a30ad383ba5ce2c8e572fec97644e6cb85745206f4e5da72d7865d9a9724eb63ce3c04e90a4eedc90c9
+SuiteSparse.v5.10.1+0.x86_64-linux-musl.tar.gz/md5/7c98daf0edfad31764c3078e6351b521
+SuiteSparse.v5.10.1+0.x86_64-linux-musl.tar.gz/sha512/2c4b3cae1bd8d1ce62dae6aeca3ffbf90c26a1b01c0da4fb7761d6fe4293b8fad0b6fbfd5f930cefe6ccaef7546a482022ff2f50dc59ecf17c5c0dfc6a5961f5
+SuiteSparse.v5.10.1+0.x86_64-unknown-freebsd.tar.gz/md5/aeca88a7bc3f9d239c61084996ce9182
+SuiteSparse.v5.10.1+0.x86_64-unknown-freebsd.tar.gz/sha512/0bee1ee07c3883fe28dd322c40195be9adb757d6dab3eb1730d7b0ff65dd4517520047696ccdda4ca618e671d898cdb45b787094594e142cb4b176549a74200b
+SuiteSparse.v5.10.1+0.x86_64-w64-mingw32.tar.gz/md5/63e449554eee134757e3d50ca8b5f47d
+SuiteSparse.v5.10.1+0.x86_64-w64-mingw32.tar.gz/sha512/95b58df4fe7520e2b526f9e3b199253909992789cd24ecca814ddb9a0c0bb37ff93c1de40239e5295a8503613cdb2431a87f0a70a3d657d94d4661f1778797f2
diff --git a/deps/csl.mk b/deps/csl.mk
index 095d4ceec4a1e3..9f95c00f3cfe77 100644
--- a/deps/csl.mk
+++ b/deps/csl.mk
@@ -1,5 +1,3 @@
-ifeq ($(USE_BINARYBUILDER_CSL),0)
-
 # Interrogate the fortran compiler (which is always GCC based) on where it is keeping its libraries
 STD_LIB_PATH := $(shell LANG=C $(FC) -print-search-dirs | grep '^programs: =' | sed -e "s/^programs: =//")
 STD_LIB_PATH += :$(shell LANG=C $(FC) -print-search-dirs | grep '^libraries: =' | sed -e "s/^libraries: =//")
@@ -12,6 +10,44 @@ define pathsearch
 $(firstword $(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))))
 endef
 
+# CSL bundles lots of system compiler libraries, and while it is quite bleeding-edge
+# as compared to what most distros ship, if someone tries to build an older branch,
+# the version of CSL that ships with that branch may become relatively old.  This is
+# not a problem for code that is built in BB, but when we build Julia with the system
+# compiler, that compiler uses the version of `libstdc++` that it is bundled with,
+# and we can get linker errors when trying to run that `julia` executable with the
+# `libstdc++` that comes from the (now old) BB-built CSL.
+#
+# To fix this, we take note when the system `libstdc++.so` is newer than whatever we
+# would get from CSL (by searching for a `GLIBCXX_3.4.X` symbol that does not exist
+# in our CSL, but would in a newer one), and default to `USE_BINARYBUILDER_CSL=0` in
+# this case.
+CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.30|GLIBCXX_3\.5\.|GLIBCXX_4\.
+
+# First, check to see if BB is disabled on a global setting
+ifeq ($(USE_BINARYBUILDER),0)
+USE_BINARYBUILDER_CSL ?= 0
+else
+# If it's not, check to see if it's disabled by a USE_SYSTEM_xxx flag
+ifeq ($(USE_SYSTEM_CSL),1)
+USE_BINARYBUILDER_CSL ?= 0
+else
+# If it's not, see if we should disable it due to `libstdc++` being newer:
+LIBSTDCXX_PATH := $(eval $(call pathsearch,libstdc++,$(STD_LIB_PATH)))
+ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p $(LIBSTDCXX_PATH) | grep $(CSL_NEXT_GLIBCXX_VERSION))))
+# Found `libstdc++`, grepped it for strings and found a `GLIBCXX` symbol
+# that is newer that whatever we have in CSL.  Default to not using BB.
+USE_BINARYBUILDER_CSL ?= 0
+else
+# Either we didn't find `libstdc++` (e.g. we're using `clang`), or we
+# found it and couldn't find the new symbol in it (it's older than what
+# BB provides, so let's use BB instead)
+USE_BINARYBUILDER_CSL ?= 1
+endif
+endif
+endif
+
+ifeq ($(USE_BINARYBUILDER_CSL),0)
 define copy_csl
 install-csl: | $$(build_shlibdir) $$(build_shlibdir)/$(1)
 $$(build_shlibdir)/$(1): | $$(build_shlibdir)
@@ -19,48 +55,34 @@ $$(build_shlibdir)/$(1): | $$(build_shlibdir)
 	[ -n "$$$${SRC_LIB}" ] && cp $$$${SRC_LIB} $$(build_shlibdir)
 endef
 
-ifeq ($(OS),WINNT)
-define gen_libname
-$$(if $(2),lib$(1)-$(2).$(SHLIB_EXT),lib$(1).$(SHLIB_EXT))
-endef
-else ifeq ($(OS),Darwin)
-define gen_libname
-$$(if $(2),lib$(1).$(2).$(SHLIB_EXT),lib$(1).$(SHLIB_EXT))
-endef
-else
-define gen_libname
-$$(if $(2),lib$(1).$(SHLIB_EXT).$(2),lib$(1).$(SHLIB_EXT))
-endef
-endif
-
 # libgfortran has multiple names; we're just going to copy any version we can find
 # Since we're only looking in the location given by `$(FC)` this should only succeed for one.
-$(eval $(call copy_csl,$(call gen_libname,gfortran,3)))
-$(eval $(call copy_csl,$(call gen_libname,gfortran,4)))
-$(eval $(call copy_csl,$(call gen_libname,gfortran,5)))
+$(eval $(call copy_csl,$(call versioned_libname,libgfortran,3)))
+$(eval $(call copy_csl,$(call versioned_libname,libgfortran,4)))
+$(eval $(call copy_csl,$(call versioned_libname,libgfortran,5)))
 
 # These are all libraries that we should always have
-$(eval $(call copy_csl,$(call gen_libname,quadmath,0)))
-$(eval $(call copy_csl,$(call gen_libname,stdc++,6)))
-$(eval $(call copy_csl,$(call gen_libname,ssp,0)))
-$(eval $(call copy_csl,$(call gen_libname,atomic,1)))
-$(eval $(call copy_csl,$(call gen_libname,gomp,1)))
+$(eval $(call copy_csl,$(call versioned_libname,libquadmath,0)))
+$(eval $(call copy_csl,$(call versioned_libname,libstdc++,6)))
+$(eval $(call copy_csl,$(call versioned_libname,libssp,0)))
+$(eval $(call copy_csl,$(call versioned_libname,libatomic,1)))
+$(eval $(call copy_csl,$(call versioned_libname,libgomp,1)))
 
 ifeq ($(OS),WINNT)
 # Windwos has special gcc_s names
 ifeq ($(ARCH),i686)
-$(eval $(call copy_csl,$(call gen_libname,gcc_s_sjlj,1)))
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s_sjlj,1)))
 else
-$(eval $(call copy_csl,$(call gen_libname,gcc_s_seh,1)))
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s_seh,1)))
 endif
 else
-$(eval $(call copy_csl,$(call gen_libname,gcc_s,1)))
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1)))
 endif
 # winpthread is only Windows, pthread is only others
 ifeq ($(OS),WINNT)
-$(eval $(call copy_csl,$(call gen_libname,winpthread,1)))
+$(eval $(call copy_csl,$(call versioned_libname,libwinpthread,1)))
 else
-$(eval $(call copy_csl,$(call gen_libname,pthread,0)))
+$(eval $(call copy_csl,$(call versioned_libname,libpthread,0)))
 endif
 
 get-csl:
diff --git a/deps/libsuitesparse.mk b/deps/libsuitesparse.mk
new file mode 100644
index 00000000000000..42ae7807bb4ec8
--- /dev/null
+++ b/deps/libsuitesparse.mk
@@ -0,0 +1,112 @@
+## LIBSUITESPARSE ##
+
+ifeq ($(USE_BLAS64), 1)
+UMFPACK_CONFIG := -DLONGBLAS='long long'
+CHOLMOD_CONFIG := -DLONGBLAS='long long'
+SPQR_CONFIG := -DLONGBLAS='long long'
+UMFPACK_CONFIG += -DSUN64
+CHOLMOD_CONFIG += -DSUN64
+SPQR_CONFIG += -DSUN64
+endif
+
+# Disable linking to libmetis
+CHOLMOD_CONFIG += -DNPARTITION
+
+ifneq ($(USE_BINARYBUILDER_LIBSUITESPARSE), 1)
+
+LIBSUITESPARSE_PROJECTS := AMD BTF CAMD CCOLAMD COLAMD CHOLMOD LDL KLU UMFPACK RBio SPQR
+LIBSUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig amd btf camd ccolamd colamd cholmod klu ldl umfpack rbio spqr)
+
+SUITE_SPARSE_LIB := $(LDFLAGS) -L"$(abspath $(BUILDDIR))/SuiteSparse-$(LIBSUITESPARSE_VER)/lib"
+ifeq ($(OS), Darwin)
+SUITE_SPARSE_LIB += $(RPATH_ESCAPED_ORIGIN)
+endif
+LIBSUITESPARSE_MFLAGS := CC="$(CC)" CXX="$(CXX)" F77="$(FC)" AR="$(AR)" RANLIB="$(RANLIB)" BLAS="-L$(build_shlibdir) -lblastrampoline" LAPACK="-L$(build_shlibdir) -lblastrampoline" \
+	  LDFLAGS="$(SUITE_SPARSE_LIB)" CFOPENMP="" CUDA=no CUDA_PATH="" \
+	  UMFPACK_CONFIG="$(UMFPACK_CONFIG)" CHOLMOD_CONFIG="$(CHOLMOD_CONFIG)" SPQR_CONFIG="$(SPQR_CONFIG)"
+ifeq ($(OS),WINNT)
+LIBSUITESPARSE_MFLAGS += UNAME=Windows
+else
+LIBSUITESPARSE_MFLAGS += UNAME=$(OS)
+endif
+
+$(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/DrTimothyAldenDavis/SuiteSparse/archive/v$(LIBSUITESPARSE_VER).tar.gz
+
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
+	$(JLCHECKSUM) $<
+	mkdir -p $(dir $@)
+	$(TAR) -C $(dir $@) --strip-components 1 -zxf $<
+	echo 1 > $@
+
+checksum-libsuitesparse: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
+	$(JLCHECKSUM) $<
+
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
+	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/SuiteSparse-shlib.patch
+	echo 1 > $@
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied
+
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: | $(build_prefix)/manifest/blastrampoline
+
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
+	$(MAKE) -C $(dir $<)SuiteSparse_config library config $(LIBSUITESPARSE_MFLAGS)
+	$(INSTALL_NAME_CMD)libsuitesparseconfig.$(SHLIB_EXT) $(dir $<)lib/libsuitesparseconfig.$(SHLIB_EXT)
+	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
+		$(MAKE) -C $(dir $<)$${PROJ} library $(LIBSUITESPARSE_MFLAGS) || exit 1; \
+		$(INSTALL_NAME_CMD)lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) $(dir $<)lib/lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) || exit 1; \
+	done
+	echo 1 > $@
+
+ifeq ($(OS),WINNT)
+LIBSUITESPARSE_SHLIB_ENV:=PATH="$(abspath $(dir $<))lib:$(build_bindir):$(PATH)"
+else
+LIBSUITESPARSE_SHLIB_ENV:=LD_LIBRARY_PATH="$(build_shlibdir)"
+endif
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
+	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
+		$(LIBSUITESPARSE_SHLIB_ENV) $(MAKE) -C $(dir $<)$${PROJ} default $(LIBSUITESPARSE_MFLAGS) || exit 1; \
+	done
+	echo 1 > $@
+
+UNINSTALL_suitesparse := $(LIBSUITESPARSE_VER) manual_suitesparse $(LIBSUITESPARSE_LIBS)
+
+$(build_prefix)/manifest/libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled | $(build_prefix)/manifest $(build_shlibdir)
+	for lib in $(LIBSUITESPARSE_LIBS); do \
+		cp -a $(dir $<)lib/lib$${lib} $(build_shlibdir) || exit 1; \
+	done
+	#cp -a $(dir $<)lib/* $(build_shlibdir)
+	#cp -a $(dir $<)include/* $(build_includedir)
+	echo $(UNINSTALL_libsuitesparse) > $@
+
+clean-libsuitesparse: uninstall-libsuitesparse
+	-rm $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
+	-rm -fr $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/lib
+	-rm -fr $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/include
+	-$(MAKE) -C $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER) clean
+
+distclean-libsuitesparse:
+	-rm -rf $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz \
+		$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)
+
+get-libsuitesparse: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
+extract-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
+configure-libsuitesparse: extract-libsuitesparse
+compile-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
+fastcheck-libsuitesparse: #none
+check-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked
+install-libsuitesparse: $(build_prefix)/manifest/libsuitesparse
+
+else # USE_BINARYBUILDER_LIBSUITESPARSE
+
+$(eval $(call bb-install,libsuitesparse,LIBSUITESPARSE,false))
+
+# libsuitesparse depends on blastrampoline
+compile-libsuitesparse: | $(build_prefix)/manifest/blastrampoline
+endif
+
+define manual_libsuitesparse
+uninstall-libsuitesparse:
+	-rm $(build_prefix)/manifest/libsuitesparse
+	-rm $(addprefix $(build_shlibdir)/lib,$3)
+endef
diff --git a/deps/libuv.version b/deps/libuv.version
index 339cba4441875c..045f329a0c9f39 100644
--- a/deps/libuv.version
+++ b/deps/libuv.version
@@ -1,2 +1,2 @@
-LIBUV_BRANCH=julia-uv2-1.39.0
-LIBUV_SHA1=fb3e3364c33ae48c827f6b103e05c3f0e78b79a9
+LIBUV_BRANCH=julia-uv2-1.42.0
+LIBUV_SHA1=c6869fba163a1e04af64ede438a8fd0191e75e9e
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 461f04b6b25034..c12c8c0eaa119a 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -457,77 +457,6 @@ $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(LLVM_SRC_DIR)/$1.patch-applied
 LLVM_PATCH_PREV := $$(LLVM_SRC_DIR)/$1.patch-applied
 endef
 
-ifeq ($(LLVM_VER_SHORT),8.0)
-$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1))
-$(eval $(call LLVM_PATCH,llvm8-D34078-vectorize-fdiv))
-$(eval $(call LLVM_PATCH,llvm-7.0-D44650)) # mingw32 build fix
-$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm7-D50010-VNCoercion-ni))
-$(eval $(call LLVM_PATCH,llvm-8.0-D50167-scev-umin))
-$(eval $(call LLVM_PATCH,llvm7-windows-race))
-$(eval $(call LLVM_PATCH,llvm-D57118-powerpc)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-exegesis-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm-test-plugin-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm-8.0-D66401-mingw-reloc)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm7-revert-D44485))
-$(eval $(call LLVM_PATCH,llvm-8.0-D63688-wasm-isLocal)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D55758-tablegen-cond)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D59389-refactor-wmma)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D59393-mma-ptx63-fix)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D66657-codegen-degenerate)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D71495-vectorize-freduce)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D75072-SCEV-add-type))
-$(eval $(call LLVM_PATCH,llvm-8.0-D65174-limit-merge-stores)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
-endif # LLVM_VER 8.0
-
-ifeq ($(LLVM_VER_SHORT),9.0)
-$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1))
-$(eval $(call LLVM_PATCH,llvm8-D34078-vectorize-fdiv))
-$(eval $(call LLVM_PATCH,llvm-7.0-D44650)) # mingw32 build fix
-$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm9-D50010-VNCoercion-ni))
-$(eval $(call LLVM_PATCH,llvm-exegesis-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm-test-plugin-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm7-revert-D44485))
-$(eval $(call LLVM_PATCH,llvm-8.0-D66657-codegen-degenerate)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D71495-vectorize-freduce)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-D75072-SCEV-add-type))
-$(eval $(call LLVM_PATCH,llvm-9.0-D65174-limit-merge-stores)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm9-D71443-PPC-MC-redef-symbol)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-9.0-D78196)) # remove for 11.0
-$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
-$(eval $(call LLVM_PATCH,llvm-9.0-D85499)) # landed as D85553
-$(eval $(call LLVM_PATCH,llvm-D80101)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-D84031)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-rGb498303066a6-gcc11-header-fix)) # remove for LLVM 12
-endif # LLVM_VER 9.0
-
-ifeq ($(LLVM_VER_SHORT),10.0)
-$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1))
-$(eval $(call LLVM_PATCH,llvm8-D34078-vectorize-fdiv))
-$(eval $(call LLVM_PATCH,llvm-7.0-D44650)) # mingw32 build fix
-$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm9-D50010-VNCoercion-ni))
-$(eval $(call LLVM_PATCH,llvm-exegesis-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm-test-plugin-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm7-revert-D44485))
-$(eval $(call LLVM_PATCH,llvm-D75072-SCEV-add-type))
-$(eval $(call LLVM_PATCH,llvm-10.0-PPC_SELECT_CC)) # delete for LLVM 11
-$(eval $(call LLVM_PATCH,llvm-10.0-PPC-LI-Elimination)) # delete for LLVM 11
-$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
-$(eval $(call LLVM_PATCH,llvm-D80101)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-D84031)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-10-D85553)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-10-r_aarch64_prel32)) # remove for LLVM 11
-$(eval $(call LLVM_PATCH,llvm-10-r_ppc_rel)) # remove for LLVM 11
-$(eval $(call LLVM_PATCH,llvm-10-unique_function_clang-sa))
-ifeq ($(BUILD_LLVM_CLANG),1)
-$(eval $(call LLVM_PATCH,llvm-D88630-clang-cmake))
-endif
-$(eval $(call LLVM_PATCH,llvm-rGb498303066a6-gcc11-header-fix)) # remove for LLVM 12
-endif # LLVM_VER 10.0
-
 ifeq ($(LLVM_VER_SHORT),11.0)
 ifeq ($(LLVM_VER_PATCH), 0)
 $(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1)) # remove for LLVM 12
@@ -552,7 +481,7 @@ ifeq ($(LLVM_VER_PATCH), 0)
 $(eval $(call LLVM_PATCH,llvm-11-D85313-debuginfo-empty-arange)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-D90722-rtdyld-absolute-relocs)) # remove for LLVM 12
 endif # LLVM_VER 11.0.0
-$(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # upstreamed as D92210
+$(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # Still being upstreamed as D92210
 $(eval $(call LLVM_PATCH,llvm-11-D92906-ppc-setjmp)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-PR48458-X86ISelDAGToDAG)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-D93092-ppc-knownbits)) # remove for LLVM 12
@@ -561,7 +490,7 @@ $(eval $(call LLVM_PATCH,llvm-11-ppc-half-ctr)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-ppc-sp-from-bp)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-rGb498303066a6-gcc11-header-fix)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-D94813-mergeicmps))
-$(eval $(call LLVM_PATCH,llvm-11-D94980-CTR-half))
+$(eval $(call LLVM_PATCH,llvm-11-D94980-CTR-half)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-D94058-sext-atomic-ops)) # remove for LLVM 12
 $(eval $(call LLVM_PATCH,llvm-11-D96283-dagcombine-half)) # remove for LLVM 12
 $(eval $(call LLVM_PROJ_PATCH,llvm-11-AArch64-FastIsel-bug))
@@ -570,6 +499,23 @@ $(eval $(call LLVM_PROJ_PATCH,llvm-11-D97571-AArch64-loh)) # remove for LLVM 13
 $(eval $(call LLVM_PROJ_PATCH,llvm-11-aarch64-addrspace)) # remove for LLVM 13
 endif # LLVM_VER 11.0
 
+ifeq ($(LLVM_VER_SHORT),12.0)
+$(eval $(call LLVM_PATCH,llvm7-revert-D44485)) # Needs upstreaming
+$(eval $(call LLVM_PATCH,llvm-12-D75072-SCEV-add-type))
+$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
+ifeq ($(BUILD_LLVM_CLANG),1)
+$(eval $(call LLVM_PATCH,llvm-D88630-clang-cmake))
+endif
+$(eval $(call LLVM_PATCH,llvm-invalid-addrspacecast-sink)) # Still being upstreamed as D92210
+$(eval $(call LLVM_PATCH,llvm-11-D93154-globalisel-as))
+$(eval $(call LLVM_PATCH,llvm-11-D94813-mergeicmps))
+$(eval $(call LLVM_PROJ_PATCH,llvm-11-AArch64-FastIsel-bug))
+$(eval $(call LLVM_PATCH,llvm-12-D97435-AArch64-movaddrreg))
+$(eval $(call LLVM_PROJ_PATCH,llvm-11-D97571-AArch64-loh)) # remove for LLVM 13
+$(eval $(call LLVM_PROJ_PATCH,llvm-11-aarch64-addrspace)) # remove for LLVM 13
+$(eval $(call LLVM_PROJ_PATCH,llvm-12-fde-symbols-aarch64)) # remove for LLVM 13
+$(eval $(call LLVM_PROJ_PATCH,llvm-12-force-eh_frame-aarch64)) # remove for LLVM 13
+endif # LLVM_VER 12.0
 
 # Add a JL prefix to the version map. DO NOT REMOVE
 ifneq ($(LLVM_VER), svn)
@@ -610,7 +556,7 @@ LLVM_INSTALL = \
     cp -r $$(LLVM_SRC_DIR)/utils/lit $2$$(build_depsbindir)/ && \
     $$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P cmake_install.cmake
 ifeq ($(OS), WINNT)
-LLVM_INSTALL += && cp $2$$(build_shlibdir)/LLVM.dll $2$$(build_depsbindir)
+LLVM_INSTALL += && cp $2$$(build_shlibdir)/libLLVM.dll $2$$(build_depsbindir)
 endif
 ifeq ($(OS),Darwin)
 # https://github.com/JuliaLang/julia/issues/29981
@@ -657,10 +603,14 @@ else # USE_BINARYBUILDER_LLVM
 ifeq ($(LLVM_ASSERTIONS), 1)
 LLVM_JLL_DOWNLOAD_NAME := libLLVM_assert
 LLVM_JLL_VER := $(LLVM_ASSERT_JLL_VER)
+LLVM_TOOLS_JLL_DOWNLOAD_NAME := LLVM_assert
+LLVM_TOOLS_JLL_VER := $(LLVM_TOOLS_ASSERT_JLL_VER)
 endif
 
 $(eval $(call bb-install,llvm,LLVM,false,true))
 $(eval $(call bb-install,clang,CLANG,false,true))
 $(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true))
 
+install-clang install-llvm-tools: install-llvm
+
 endif # USE_BINARYBUILDER_LLVM
diff --git a/deps/openblas.mk b/deps/openblas.mk
index e555e613c2ebc0..6eb0e43c4d4762 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -103,12 +103,12 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/
 		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-exshift.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
 	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
 		patch -p1 -f < $(SRCDIR)/patches/openblas-exshift.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-exshift.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied
 	echo 1 > $@
 
 $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
diff --git a/deps/openblas.version b/deps/openblas.version
index 7814f788417671..346e75dac614bc 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -1,2 +1,2 @@
-OPENBLAS_BRANCH=v0.3.13
-OPENBLAS_SHA1=d2b11c47774b9216660e76e2fc67e87079f26fa1
+OPENBLAS_BRANCH=v0.3.17
+OPENBLAS_SHA1=d909f9f3d4fc4ccff36d69f178558df154ba1002
diff --git a/deps/patches/SuiteSparse-winclang.patch b/deps/patches/SuiteSparse-winclang.patch
deleted file mode 100644
index 8bfb64dc278393..00000000000000
--- a/deps/patches/SuiteSparse-winclang.patch
+++ /dev/null
@@ -1,14 +0,0 @@
---- SuiteSparse_config/SuiteSparse_config.h	2015-07-15 03:26:41.000000000 +0000
-+++ SuiteSparse_config/SuiteSparse_config.h	2016-07-01 00:55:57.157465600 +0000
-@@ -54,7 +54,11 @@
- #ifdef _WIN64
- 
- #define SuiteSparse_long __int64
-+#ifdef _MSVC_VER
- #define SuiteSparse_long_max _I64_MAX
-+#else
-+#define SuiteSparse_long_max LLONG_MAX
-+#endif
- #define SuiteSparse_long_idd "I64d"
- 
- #else
diff --git a/deps/patches/llvm-10-r_aarch64_prel32.patch b/deps/patches/llvm-10-r_aarch64_prel32.patch
deleted file mode 100644
index 7236574b3311d6..00000000000000
--- a/deps/patches/llvm-10-r_aarch64_prel32.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From c530dd687328d4208f91e62b600d25ec6e7f0f39 Mon Sep 17 00:00:00 2001
-From: Fangrui Song <i@maskray.me>
-Date: Fri, 17 Jul 2020 23:49:15 -0700
-Subject: [PATCH 2/2] [RelocationResolver] Support R_AARCH64_PREL32
-
-Code from D83800 by Yichao Yu
----
- llvm/lib/Object/RelocationResolver.cpp  |  6 ++++++
- llvm/test/DebugInfo/AArch64/eh-frame.ll | 21 +++++++++++++++++++++
- 2 files changed, 27 insertions(+)
- create mode 100644 llvm/test/DebugInfo/AArch64/eh-frame.ll
-
-diff --git llvm/lib/Object/RelocationResolver.cpp llvm/lib/Object/RelocationResolver.cpp
-index eedb236f83d..80339ebf7b9 100644
---- llvm/lib/Object/RelocationResolver.cpp
-+++ llvm/lib/Object/RelocationResolver.cpp
-@@ -62,6 +62,8 @@ static bool supportsAArch64(uint64_t Type) {
-   switch (Type) {
-   case ELF::R_AARCH64_ABS32:
-   case ELF::R_AARCH64_ABS64:
-+  case ELF::R_AARCH64_PREL32:
-+  case ELF::R_AARCH64_PREL64:
-     return true;
-   default:
-     return false;
-@@ -74,6 +76,10 @@ static uint64_t resolveAArch64(RelocationRef R, uint64_t S, uint64_t A) {
-     return (S + getELFAddend(R)) & 0xFFFFFFFF;
-   case ELF::R_AARCH64_ABS64:
-     return S + getELFAddend(R);
-+  case ELF::R_AARCH64_PREL32:
-+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
-+  case ELF::R_AARCH64_PREL64:
-+    return S + getELFAddend(R) - R.getOffset();
-   default:
-     llvm_unreachable("Invalid relocation type");
-   }
-diff --git llvm/test/DebugInfo/AArch64/eh-frame.ll llvm/test/DebugInfo/AArch64/eh-frame.ll
-new file mode 100644
-index 00000000000..9651159271e
---- /dev/null
-+++ llvm/test/DebugInfo/AArch64/eh-frame.ll
-@@ -0,0 +1,21 @@
-+; RUN: llc -filetype=obj -mtriple=aarch64 %s -o %t.o
-+; RUN: llvm-readobj -r %t.o | FileCheck %s --check-prefix=REL32
-+; RUN: llvm-dwarfdump --eh-frame %t.o 2>&1 | FileCheck %s
-+
-+; REL32:      R_AARCH64_PREL32 .text 0x0
-+; REL32-NEXT: R_AARCH64_PREL32 .text 0x4
-+
-+; CHECK-NOT:  warning:
-+; CHECK: FDE cie=00000000 pc=00000000...00000004
-+;; TODO Take relocation into consideration
-+; CHECK: FDE cie=00000000 pc=00000000...00000004
-+
-+define void @foo() {
-+entry:
-+  ret void
-+}
-+
-+define void @bar() {
-+entry:
-+  ret void
-+}
--- 
-2.28.0
-
diff --git a/deps/patches/llvm-10-r_ppc_rel.patch b/deps/patches/llvm-10-r_ppc_rel.patch
deleted file mode 100644
index f86ee75bb845ea..00000000000000
--- a/deps/patches/llvm-10-r_ppc_rel.patch
+++ /dev/null
@@ -1,116 +0,0 @@
-From b7f604447cbd76c803ccff5c0ff1b699b9c1504e Mon Sep 17 00:00:00 2001
-From: Fangrui Song <i@maskray.me>
-Date: Fri, 17 Jul 2020 23:29:50 -0700
-Subject: [PATCH 1/2] [RelocationResolver] Support R_PPC_REL32 &
- R_PPC64_REL{32,64}
-
-This suppresses `failed to compute relocation: R_PPC_REL32, Invalid data was encountered while parsing the file`
-and its 64-bit variants when running llvm-dwarfdump on a PowerPC object file with .eh_frame
-
-Unfortunately it is difficult to test the computation:
-DWARFDataExtractor::getEncodedPointer does not use the relocated value
-and even if it does, we need to teach llvm-dwarfdump --eh-frame to do
-some linker job to report a reasonable address.
----
- llvm/lib/Object/RelocationResolver.cpp  | 20 +++++++++++--
- llvm/test/DebugInfo/PowerPC/eh-frame.ll | 39 +++++++++++++++++++++++++
- 2 files changed, 57 insertions(+), 2 deletions(-)
- create mode 100644 llvm/test/DebugInfo/PowerPC/eh-frame.ll
-
-diff --git llvm/lib/Object/RelocationResolver.cpp llvm/lib/Object/RelocationResolver.cpp
-index 31478be7899..eedb236f83d 100644
---- llvm/lib/Object/RelocationResolver.cpp
-+++ llvm/lib/Object/RelocationResolver.cpp
-@@ -131,6 +131,8 @@ static bool supportsPPC64(uint64_t Type) {
-   switch (Type) {
-   case ELF::R_PPC64_ADDR32:
-   case ELF::R_PPC64_ADDR64:
-+  case ELF::R_PPC64_REL32:
-+  case ELF::R_PPC64_REL64:
-     return true;
-   default:
-     return false;
-@@ -143,6 +145,10 @@ static uint64_t resolvePPC64(RelocationRef R, uint64_t S, uint64_t A) {
-     return (S + getELFAddend(R)) & 0xFFFFFFFF;
-   case ELF::R_PPC64_ADDR64:
-     return S + getELFAddend(R);
-+  case ELF::R_PPC64_REL32:
-+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
-+  case ELF::R_PPC64_REL64:
-+    return S + getELFAddend(R) - R.getOffset();
-   default:
-     llvm_unreachable("Invalid relocation type");
-   }
-@@ -238,12 +244,22 @@ static uint64_t resolveX86(RelocationRef R, uint64_t S, uint64_t A) {
- }
- 
- static bool supportsPPC32(uint64_t Type) {
--  return Type == ELF::R_PPC_ADDR32;
-+  switch (Type) {
-+  case ELF::R_PPC_ADDR32:
-+  case ELF::R_PPC_REL32:
-+    return true;
-+  default:
-+    return false;
-+  }
- }
- 
- static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) {
--  if (R.getType() == ELF::R_PPC_ADDR32)
-+  switch (R.getType()) {
-+  case ELF::R_PPC_ADDR32:
-     return (S + getELFAddend(R)) & 0xFFFFFFFF;
-+  case ELF::R_PPC_REL32:
-+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
-+  }
-   llvm_unreachable("Invalid relocation type");
- }
- 
-diff --git llvm/test/DebugInfo/PowerPC/eh-frame.ll llvm/test/DebugInfo/PowerPC/eh-frame.ll
-new file mode 100644
-index 00000000000..3a8f7df6b61
---- /dev/null
-+++ llvm/test/DebugInfo/PowerPC/eh-frame.ll
-@@ -0,0 +1,39 @@
-+; RUN: llc -filetype=obj -mtriple=powerpc %s -o %t32.o
-+; RUN: llvm-readobj -r %t32.o | FileCheck %s --check-prefix=PPC_REL
-+; RUN: llvm-dwarfdump --eh-frame %t32.o 2>&1 | FileCheck %s --check-prefix=PPC
-+
-+; PPC_REL:      R_PPC_REL32 .text 0x0
-+; PPC_REL-NEXT: R_PPC_REL32 .text 0x4
-+
-+; PPC-NOT: warning:
-+; PPC: FDE cie=00000000 pc=00000000...00000004
-+;; TODO Take relocation into consideration
-+; PPC: FDE cie=00000000 pc=00000000...00000004
-+
-+; RUN: llc -filetype=obj -mtriple=ppc64 %s -o %t64.o
-+; RUN: llvm-readobj -r %t64.o | FileCheck %s --check-prefix=PPC64_REL
-+; RUN: llvm-dwarfdump --eh-frame %t64.o 2>&1 | FileCheck %s --check-prefix=PPC64
-+
-+; PPC64_REL:      R_PPC64_REL32 .text 0x0
-+; PPC64_REL-NEXT: R_PPC64_REL32 .text 0x10
-+
-+; PPC64-NOT: warning:
-+; PPC64: FDE cie=00000000 pc=00000000...00000010
-+; PPC64: FDE cie=00000000 pc=00000000...00000010
-+
-+; RUN: llc -filetype=obj -mtriple=ppc64le -code-model=large %s -o %t64l.o
-+; RUN: llvm-readobj -r %t64l.o | FileCheck %s --check-prefix=PPC64L_REL
-+; RUN: llvm-dwarfdump --eh-frame %t64l.o 2>&1 | FileCheck %s --check-prefix=PPC64
-+
-+; PPC64L_REL:      R_PPC64_REL64 .text 0x0
-+; PPC64L_REL-NEXT: R_PPC64_REL64 .text 0x10
-+
-+define void @foo() {
-+entry:
-+  ret void
-+}
-+
-+define void @bar() {
-+entry:
-+  ret void
-+}
--- 
-2.28.0
-
diff --git a/deps/patches/llvm-10.0-PPC-LI-Elimination.patch b/deps/patches/llvm-10.0-PPC-LI-Elimination.patch
deleted file mode 100644
index f47a2c97f0e372..00000000000000
--- a/deps/patches/llvm-10.0-PPC-LI-Elimination.patch
+++ /dev/null
@@ -1,161 +0,0 @@
-From 5423496713e84dea2650e1703821de620f934573 Mon Sep 17 00:00:00 2001
-From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
-Date: Thu, 9 Apr 2020 21:34:30 -0500
-Subject: [PATCH] [PowerPC] Bail out of redundant LI elimination on an implicit
- kill
-
-The transformation currently does not differentiate between explicit
-and implicit kills. However, it is not valid to later simply clear
-an implicit kill flag since the kill could be due to a call or return.
-
-Fixes: https://bugs.llvm.org/show_bug.cgi?id=45374
----
- .../lib/Target/PowerPC/PPCPreEmitPeephole.cpp |  10 ++
- .../remove-redundant-li-skip-imp-kill.mir     | 114 ++++++++++++++++++
- 2 files changed, 124 insertions(+)
- create mode 100644 llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir
-
-diff --git llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
-index a4b4bf2973d..4ea714ff15f 100644
---- llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
-+++ llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
-@@ -109,6 +109,16 @@ namespace {
-           // Track the operand that kill Reg. We would unset the kill flag of
-           // the operand if there is a following redundant load immediate.
-           int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
-+
-+          // We can't just clear implicit kills, so if we encounter one, stop
-+          // looking further.
-+          if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
-+            LLVM_DEBUG(dbgs()
-+                       << "Encountered an implicit kill, cannot proceed: ");
-+            LLVM_DEBUG(AfterBBI->dump());
-+            break;
-+          }
-+
-           if (KillIdx != -1) {
-             assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
-             DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
-diff --git llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir
-new file mode 100644
-index 00000000000..78091d027ce
---- /dev/null
-+++ llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir
-@@ -0,0 +1,114 @@
-+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-+# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -run-pass \
-+# RUN:   ppc-pre-emit-peephole %s -o - | FileCheck %s
-+--- |
-+  ; ModuleID = 'a.ll'
-+  source_filename = "a.ll"
-+  target datalayout = "e-m:e-i64:64-n32:64"
-+
-+  ; Function Attrs: nounwind
-+  define dso_local signext i32 @b(i32 signext %a, i32* nocapture %b) local_unnamed_addr #0 {
-+  entry:
-+    %call = tail call signext i32 @g(i32 signext %a)
-+    store i32 %call, i32* %b, align 4
-+    %call1 = tail call signext i32 @g(i32 signext %a)
-+    ret i32 %call1
-+  }
-+
-+  ; Function Attrs: nounwind
-+  declare signext i32 @g(i32 signext) local_unnamed_addr #0
-+
-+  ; Function Attrs: nounwind
-+  declare void @llvm.stackprotector(i8*, i8**) #0
-+
-+  attributes #0 = { nounwind }
-+
-+...
-+---
-+name:            b
-+alignment:       16
-+exposesReturnsTwice: false
-+legalized:       false
-+regBankSelected: false
-+selected:        false
-+failedISel:      false
-+tracksRegLiveness: true
-+hasWinCFI:       false
-+registers:       []
-+liveins:
-+  - { reg: '$x3', virtual-reg: '' }
-+  - { reg: '$x4', virtual-reg: '' }
-+frameInfo:
-+  isFrameAddressTaken: false
-+  isReturnAddressTaken: false
-+  hasStackMap:     false
-+  hasPatchPoint:   false
-+  stackSize:       64
-+  offsetAdjustment: 0
-+  maxAlignment:    1
-+  adjustsStack:    true
-+  hasCalls:        true
-+  stackProtector:  ''
-+  maxCallFrameSize: 32
-+  cvBytesOfCalleeSavedRegisters: 0
-+  hasOpaqueSPAdjustment: false
-+  hasVAStart:      false
-+  hasMustTailInVarArgFunc: false
-+  localFrameSize:  0
-+  savePoint:       ''
-+  restorePoint:    ''
-+fixedStack:
-+  - { id: 0, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default,
-+      callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
-+      debug-info-expression: '', debug-info-location: '' }
-+  - { id: 1, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
-+      callee-saved-register: '$x29', callee-saved-restored: true, debug-info-variable: '',
-+      debug-info-expression: '', debug-info-location: '' }
-+stack:           []
-+callSites:       []
-+constants:       []
-+machineFunctionInfo: {}
-+body:             |
-+  bb.0.entry:
-+    liveins: $x3, $x4, $x29, $x30
-+
-+    ; CHECK-LABEL: name: b
-+    ; CHECK: liveins: $x3, $x4, $x29, $x30
-+    ; CHECK: $x0 = MFLR8 implicit $lr8
-+    ; CHECK: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.0)
-+    ; CHECK: STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.1, align 16)
-+    ; CHECK: STD killed $x0, 16, $x1
-+    ; CHECK: $x1 = STDU $x1, -64, $x1
-+    ; CHECK: $x30 = OR8 killed $x4, $x4
-+    ; CHECK: dead $r4 = LI 10, implicit-def $x4
-+    ; CHECK: $x29 = OR8 $x3, $x3
-+    ; CHECK: BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit $x2, implicit-def $r1, implicit-def $x3
-+    ; CHECK: STW8 killed renamable $x3, 0, killed renamable $x30 :: (store 4 into %ir.b)
-+    ; CHECK: $x3 = OR8 killed $x29, $x29
-+    ; CHECK: BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
-+    ; CHECK: $x1 = ADDI8 $x1, 64
-+    ; CHECK: $x0 = LD 16, $x1
-+    ; CHECK: $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.1, align 16)
-+    ; CHECK: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.0)
-+    ; CHECK: MTLR8 killed $x0, implicit-def $lr8
-+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
-+    $x0 = MFLR8 implicit $lr8
-+    STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.1)
-+    STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.0, align 16)
-+    STD killed $x0, 16, $x1
-+    $x1 = STDU $x1, -64, $x1
-+    $x30 = OR8 killed $x4, $x4
-+    dead $r4 = LI 10, implicit-def $x4
-+    $x29 = OR8 $x3, $x3
-+    BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit $x2, implicit-def $r1, implicit-def $x3
-+    STW8 killed renamable $x3, 0, killed renamable $x30 :: (store 4 into %ir.b)
-+    $x3 = OR8 killed $x29, $x29
-+    BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
-+    $x1 = ADDI8 $x1, 64
-+    $x0 = LD 16, $x1
-+    $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.0, align 16)
-+    $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.1)
-+    MTLR8 killed $x0, implicit-def $lr8
-+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
-+
-+...
--- 
-2.26.0
-
diff --git a/deps/patches/llvm-10.0-PPC_SELECT_CC.patch b/deps/patches/llvm-10.0-PPC_SELECT_CC.patch
deleted file mode 100644
index a56b20eb064f04..00000000000000
--- a/deps/patches/llvm-10.0-PPC_SELECT_CC.patch
+++ /dev/null
@@ -1,135 +0,0 @@
-From 4765db99fa35257a4209e2976903d81087a3f0d7 Mon Sep 17 00:00:00 2001
-From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
-Date: Thu, 9 Apr 2020 13:53:02 -0500
-Subject: [PATCH] [PowerPC] Don't assert on SELECT_CC with i1 type
-
-When we try to select a SELECT_CC on Power9, we check if it can be matched to a
-SETB instruction. In that function, we assert that the output type is i32/i64.
-This is unnecessary as it is perfectly reasonable to have an i1 SELECT_CC.
-Change that from an assert to an early exit condition.
-Fixes: https://bugs.llvm.org/show_bug.cgi?id=45448
----
- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp |  7 +-
- llvm/test/CodeGen/PowerPC/pr45448.ll        | 90 +++++++++++++++++++++
- 2 files changed, 92 insertions(+), 5 deletions(-)
- create mode 100644 llvm/test/CodeGen/PowerPC/pr45448.ll
-
-diff --git llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
-index 776ec52e260..9230ce4118b 100644
---- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
-+++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
-@@ -4241,13 +4241,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
-   SDValue TrueRes = N->getOperand(2);
-   SDValue FalseRes = N->getOperand(3);
-   ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
--  if (!TrueConst)
-+  if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
-+                     N->getSimpleValueType(0) != MVT::i32))
-     return false;
- 
--  assert((N->getSimpleValueType(0) == MVT::i64 ||
--          N->getSimpleValueType(0) == MVT::i32) &&
--         "Expecting either i64 or i32 here.");
--
-   // We are looking for any of:
-   // (select_cc lhs, rhs,  1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
-   // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
-diff --git llvm/test/CodeGen/PowerPC/pr45448.ll llvm/test/CodeGen/PowerPC/pr45448.ll
-new file mode 100644
-index 00000000000..eb0a61cb075
---- /dev/null
-+++ llvm/test/CodeGen/PowerPC/pr45448.ll
-@@ -0,0 +1,90 @@
-+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-+; RUN: FileCheck %s
-+define hidden void @julia_tryparse_internal_45896() #0 {
-+; CHECK-LABEL: julia_tryparse_internal_45896:
-+; CHECK:       # %bb.0: # %top
-+; CHECK-NEXT:    ld r3, 0(r3)
-+; CHECK-NEXT:    cmpldi r3, 0
-+; CHECK-NEXT:    beq cr0, .LBB0_3
-+; CHECK-NEXT:  # %bb.1: # %top
-+; CHECK-NEXT:    cmpldi r3, 10
-+; CHECK-NEXT:    beq cr0, .LBB0_4
-+; CHECK-NEXT:  # %bb.2: # %top
-+; CHECK-NEXT:  .LBB0_3: # %fail194
-+; CHECK-NEXT:  .LBB0_4: # %L294
-+; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_6
-+; CHECK-NEXT:  # %bb.5: # %L294
-+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_7
-+; CHECK-NEXT:  .LBB0_6: # %L1057.preheader
-+; CHECK-NEXT:  .LBB0_7: # %L670
-+; CHECK-NEXT:    lis r5, 4095
-+; CHECK-NEXT:    ori r5, r5, 65533
-+; CHECK-NEXT:    sldi r5, r5, 4
-+; CHECK-NEXT:    cmpdi r3, 0
-+; CHECK-NEXT:    sradi r4, r3, 63
-+; CHECK-NEXT:    mulhdu r3, r3, r5
-+; CHECK-NEXT:    maddld r6, r4, r5, r3
-+; CHECK-NEXT:    crnor 4*cr5+gt, eq, eq
-+; CHECK-NEXT:    cmpld r6, r3
-+; CHECK-NEXT:    mulld r3, r4, r5
-+; CHECK-NEXT:    cmpldi cr1, r3, 0
-+; CHECK-NEXT:    crandc 4*cr5+lt, lt, 4*cr1+eq
-+; CHECK-NEXT:    mulhdu. r3, r4, r5
-+; CHECK-NEXT:    bc 4, 4*cr5+gt, .LBB0_10
-+; CHECK-NEXT:  # %bb.8: # %L670
-+; CHECK-NEXT:    crorc 4*cr5+lt, 4*cr5+lt, eq
-+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_10
-+; CHECK-NEXT:  # %bb.9: # %L917
-+; CHECK-NEXT:  .LBB0_10: # %L994
-+top:
-+  %0 = load i64, i64* undef, align 8
-+  %1 = icmp ne i64 %0, 0
-+  %2 = sext i64 %0 to i128
-+  switch i64 %0, label %pass195 [
-+    i64 10, label %L294
-+    i64 16, label %L294.fold.split
-+    i64 0, label %fail194
-+  ]
-+
-+L294.fold.split:                                  ; preds = %top
-+  unreachable
-+
-+L294:                                             ; preds = %top
-+  %3 = add nsw i32 0, -48
-+  %4 = zext i32 %3 to i128
-+  %5 = add i128 %4, 0
-+  switch i32 undef, label %L670 [
-+    i32 -1031471104, label %L1057.preheader
-+    i32 536870912, label %L1057.preheader
-+  ]
-+
-+L670:                                             ; preds = %L294
-+  br label %L898
-+
-+L1057.preheader:                                  ; preds = %L294, %L294
-+  unreachable
-+
-+L898:                                             ; preds = %L670
-+  %umul = call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %2, i128 %5)
-+  %umul.ov = extractvalue { i128, i1 } %umul, 1
-+  %value_phi102 = and i1 %1, %umul.ov
-+  %6 = or i1 %value_phi102, false
-+  br i1 %6, label %L917, label %L994
-+
-+L917:                                             ; preds = %L898
-+  unreachable
-+
-+L994:                                             ; preds = %L898
-+  unreachable
-+
-+fail194:                                          ; preds = %top
-+  unreachable
-+
-+pass195:                                          ; preds = %top
-+  unreachable
-+}
-+
-+; Function Attrs: nounwind readnone speculatable willreturn
-+declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
--- 
-2.26.0
-
diff --git a/deps/patches/llvm-8.0-D75072-SCEV-add-type.patch b/deps/patches/llvm-12-D75072-SCEV-add-type.patch
similarity index 79%
rename from deps/patches/llvm-8.0-D75072-SCEV-add-type.patch
rename to deps/patches/llvm-12-D75072-SCEV-add-type.patch
index 6418eca5d28b69..c74cd70b20e65a 100644
--- a/deps/patches/llvm-8.0-D75072-SCEV-add-type.patch
+++ b/deps/patches/llvm-12-D75072-SCEV-add-type.patch
@@ -1,10 +1,8 @@
-From f11f45a45ce8b90c798dd939d2782205e4291360 Mon Sep 17 00:00:00 2001
+From 4827d22b3e297b82c7689f0fb06b38e67d92b578 Mon Sep 17 00:00:00 2001
 From: Keno Fischer <keno@juliacomputing.com>
-Date: Fri, 6 Mar 2020 10:29:20 -0500
+Date: Wed, 21 Apr 2021 12:25:07 -0400
 Subject: [PATCH] [SCEV] Record NI types in add exprs
 
-Summary:
-(Rebased to LLVM 8 from the original LLVM 9 patch)
 This fixes a case where loop-reduce introduces ptrtoint/inttoptr for
 non-integral address space pointers. Over the past several years, we
 have gradually improved the SCEVExpander to actually do something
@@ -70,28 +68,22 @@ However, given the practical considerations above, that's a project
 for another time. For now, simply allowing the existing pointer-diff
 pattern for non-integral pointers seems to work ok.
 
-Reviewers: sanjoy, reames, vtjnash, vchuravy
-
-Subscribers: hiraditya, javed.absar, llvm-commits
-
-Tags: #llvm, #julialang
-
 Differential Revision: https://reviews.llvm.org/D75072
 ---
- llvm/include/llvm/Analysis/ScalarEvolution.h  | 26 +++++--
- .../Analysis/ScalarEvolutionExpressions.h     | 70 ++++++++++++++++---
- llvm/lib/Analysis/ScalarEvolution.cpp         | 44 +++++++++---
- .../LoopStrengthReduce/nonintegral.ll         | 35 +++++++++-
- 4 files changed, 150 insertions(+), 25 deletions(-)
+ llvm/include/llvm/Analysis/ScalarEvolution.h  | 21 ++++-
+ .../Analysis/ScalarEvolutionExpressions.h     | 81 ++++++++++++++++---
+ llvm/lib/Analysis/ScalarEvolution.cpp         | 41 +++++++---
+ .../LoopStrengthReduce/nonintegral.ll         | 35 +++++++-
+ 4 files changed, 155 insertions(+), 23 deletions(-)
 
 diff --git llvm/include/llvm/Analysis/ScalarEvolution.h llvm/include/llvm/Analysis/ScalarEvolution.h
-index 5286f6a220e..f27fceb70d2 100644
+index b3f199de2cfa..d98fbeb5dcf7 100644
 --- llvm/include/llvm/Analysis/ScalarEvolution.h
 +++ llvm/include/llvm/Analysis/ScalarEvolution.h
-@@ -116,6 +116,19 @@ public:
+@@ -120,6 +120,19 @@ public:
      NoWrapMask = (1 << 3) - 1
    };
-
+ 
 +  /// HasNonIntegralPointerFlag are bitfield indices into SubclassData.
 +  ///
 +  /// When constructing SCEV expressions for LLVM expressions with non-integral
@@ -105,12 +97,12 @@ index 5286f6a220e..f27fceb70d2 100644
 +    FlagHasNIPointers = (1 << 3)
 +  };
 +
-   explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy)
-       : FastID(ID), SCEVType(SCEVTy) {}
-   SCEV(const SCEV &) = delete;
-@@ -138,6 +138,10 @@ public:
-   /// Return true if the specified scev is negated, but not a constant.
-   bool isNonConstantNegative() const;
+   explicit SCEV(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
+                 unsigned short ExpressionSize)
+       : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {}
+@@ -156,6 +169,10 @@ public:
+     return ExpressionSize;
+   }
  
 +  bool hasNonIntegralPointers() const {
 +    return SubclassData & FlagHasNIPointers;
@@ -119,11 +111,29 @@ index 5286f6a220e..f27fceb70d2 100644
    /// Print out the internal representation of this scalar to the specified
    /// stream.  This should really only be used for debugging purposes.
    void print(raw_ostream &OS) const;
+@@ -745,7 +762,7 @@ public:
+                                         const BasicBlock *ExitingBlock);
+ 
+   /// The terms "backedge taken count" and "exit count" are used
+-  /// interchangeably to refer to the number of times the backedge of a loop 
++  /// interchangeably to refer to the number of times the backedge of a loop
+   /// has executed before the loop is exited.
+   enum ExitCountKind {
+     /// An expression exactly describing the number of times the backedge has
+@@ -758,7 +775,7 @@ public:
+   };
+ 
+   /// Return the number of times the backedge executes before the given exit
+-  /// would be taken; if not exactly computable, return SCEVCouldNotCompute. 
++  /// would be taken; if not exactly computable, return SCEVCouldNotCompute.
+   /// For a single exit loop, this value is equivelent to the result of
+   /// getBackedgeTakenCount.  The loop is guaranteed to exit (via *some* exit)
+   /// before the backedge is executed (ExitCount + 1) times.  Note that there
 diff --git llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-index 876d68438ef..b9ea23c0086 100644
+index 37e675f08afc..6e532b22f5b3 100644
 --- llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
 +++ llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-@@ -181,6 +184,13 @@ class Type;
+@@ -228,6 +228,13 @@ class Type;
        return getNoWrapFlags(FlagNW) != FlagAnyWrap;
      }
  
@@ -137,30 +147,35 @@ index 876d68438ef..b9ea23c0086 100644
      /// Methods for support type inquiry through isa, cast, and dyn_cast:
      static bool classof(const SCEV *S) {
        return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-@@ -215,24 +220,54 @@ class Type;
-   class SCEVAddExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
+@@ -264,19 +271,16 @@ class Type;
+ 
+     Type *Ty;
  
 +  protected:
-     SCEVAddExpr(const FoldingSetNodeIDRef ID,
-                 const SCEV *const *O, size_t N)
-       : SCEVCommutativeExpr(ID, scAddExpr, O, N) {}
+     SCEVAddExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+         : SCEVCommutativeExpr(ID, scAddExpr, O, N) {
+-      auto *FirstPointerTypedOp = find_if(operands(), [](const SCEV *Op) {
+-        return Op->getType()->isPointerTy();
+-      });
+-      if (FirstPointerTypedOp != operands().end())
+-        Ty = (*FirstPointerTypedOp)->getType();
+-      else
+-        Ty = getOperand(0)->getType();
++
+     }
  
    public:
--    Type *getType() const {
--      // Use the type of the last operand, which is likely to be a pointer
--      // type, if there is one. This doesn't usually matter, but it can help
--      // reduce casts when the expressions are expanded.
--      return getOperand(getNumOperands() - 1)->getType();
-+    /// Returns the type of the add expression, by looking either at the last
-+    /// operand or deferring to the SCEVAddNIExpr subclass for non-integral
-+    /// pointers.
+-    Type *getType() const { return Ty; }
++    // Returns the type of the add expression, by looking either at the last operand
++    // or deferring to the SCEVAddNIExpr subclass.
 +    Type *getType() const;
-+
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr; }
-+  };
-+
+ 
+     /// Methods for support type inquiry through isa, cast, and dyn_cast:
+     static bool classof(const SCEV *S) {
+@@ -284,6 +288,46 @@ class Type;
+     }
+   };
+ 
 +  /// This node represents an addition of some number of SCEVs, one which
 +  /// is a non-integral pointer type, requiring us to know the type exactly for
 +  /// correctness.
@@ -172,18 +187,17 @@ index 876d68438ef..b9ea23c0086 100644
 +                  PointerType *NIType)
 +        : SCEVAddExpr(ID, O, N), NIType(NIType) {
 +      SubclassData |= FlagHasNIPointers;
-     }
- 
++    }
++
 +  public:
 +    Type *getType() const { return NIType; }
 +
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
--      return S->getSCEVType() == scAddExpr;
++    /// Methods for support type inquiry through isa, cast, and dyn_cast:
++    static bool classof(const SCEV *S) {
 +      return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers();
-     }
-   };
- 
++    }
++  };
++
 +  inline Type *SCEVAddExpr::getType() const {
 +    // In general, use the type of the last operand, which is likely to be a
 +    // pointer type, if there is one. This doesn't usually matter, but it can
@@ -192,13 +206,20 @@ index 876d68438ef..b9ea23c0086 100644
 +    // that stores that type explicitly.
 +    if (hasNonIntegralPointers())
 +      return cast<SCEVAddNIExpr>(this)->getType();
-+    return getOperand(getNumOperands() - 1)->getType();
++
++    auto *FirstPointerTypedOp = find_if(operands(), [](const SCEV *Op) {
++      return Op->getType()->isPointerTy();
++    });
++    if (FirstPointerTypedOp != operands().end())
++      return (*FirstPointerTypedOp)->getType();
++    else
++      return  getOperand(0)->getType();
 +  }
 +
    /// This node represents multiplication of some number of SCEVs.
    class SCEVMulExpr : public SCEVCommutativeExpr {
      friend class ScalarEvolution;
-@@ -242,6 +273,18 @@ class Type;
+@@ -293,6 +337,18 @@ class Type;
        : SCEVCommutativeExpr(ID, scMulExpr, O, N) {}
  
    public:
@@ -217,16 +238,16 @@ index 876d68438ef..b9ea23c0086 100644
      /// Methods for support type inquiry through isa, cast, and dyn_cast:
      static bool classof(const SCEV *S) {
        return S->getSCEVType() == scMulExpr;
-@@ -467,9 +690,12 @@ class Type;
+@@ -531,9 +587,12 @@ class Type;
      /// instances owned by a ScalarEvolution.
      SCEVUnknown *Next;
  
 -    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
 -                ScalarEvolution *se, SCEVUnknown *next) :
--      SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {}
+-      SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {}
 +    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se,
 +                SCEVUnknown *next, bool ValueIsNIPtr)
-+        : SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {
++        : SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {
 +      if (ValueIsNIPtr)
 +        SubclassData |= FlagHasNIPointers;
 +    }
@@ -234,10 +255,10 @@ index 876d68438ef..b9ea23c0086 100644
      // Implement CallbackVH.
      void deleted() override;
 diff --git llvm/lib/Analysis/ScalarEvolution.cpp llvm/lib/Analysis/ScalarEvolution.cpp
-index cd74815a895..09e98345d0f 100644
+index fe9d8297d679..1fa7b8ce1451 100644
 --- llvm/lib/Analysis/ScalarEvolution.cpp
 +++ llvm/lib/Analysis/ScalarEvolution.cpp
-@@ -354,12 +354,13 @@ Type *SCEV::getType() const {
+@@ -389,12 +389,13 @@ Type *SCEV::getType() const {
    case scSignExtend:
      return cast<SCEVCastExpr>(this)->getType();
    case scAddRecExpr:
@@ -252,18 +273,7 @@ index cd74815a895..09e98345d0f 100644
    case scAddExpr:
      return cast<SCEVAddExpr>(this)->getType();
    case scUDivExpr:
-@@ -2419,8 +2420,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-   }
- 
-   // Limit recursion calls depth.
--  if (Depth > MaxArithDepth)
-+  if (Depth > MaxArithDepth) {
-     return getOrCreateAddExpr(Ops, Flags);
-+  }
- 
-   // Okay, check to see if the same value occurs in the operand list more than
-   // once.  If so, merge them together into an multiply expression.  Since we
-@@ -2761,16 +2763,27 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
+@@ -2679,16 +2680,27 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
                                      SCEV::NoWrapFlags Flags) {
    FoldingSetNodeID ID;
    ID.AddInteger(scAddExpr);
@@ -295,7 +305,7 @@ index cd74815a895..09e98345d0f 100644
      UniqueSCEVs.InsertNode(S, IP);
      addToLoopUseLists(S);
    }
-@@ -2783,8 +2763,10 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
+@@ -2701,8 +2713,10 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
                                         const Loop *L, SCEV::NoWrapFlags Flags) {
    FoldingSetNodeID ID;
    ID.AddInteger(scAddRecExpr);
@@ -307,15 +317,15 @@ index cd74815a895..09e98345d0f 100644
    ID.AddPointer(L);
    void *IP = nullptr;
    SCEVAddRecExpr *S =
-@@ -2798,6 +2813,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
+@@ -2716,6 +2730,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
      addToLoopUseLists(S);
    }
-   S->setNoWrapFlags(Flags);
+   setNoWrapFlags(S, Flags);
 +  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
    return S;
  }
  
-@@ -2806,8 +2822,11 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
+@@ -2724,8 +2739,11 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
                                      SCEV::NoWrapFlags Flags) {
    FoldingSetNodeID ID;
    ID.AddInteger(scMulExpr);
@@ -328,7 +338,7 @@ index cd74815a895..09e98345d0f 100644
    void *IP = nullptr;
    SCEVMulExpr *S =
      static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-@@ -2820,6 +2839,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
+@@ -2738,6 +2756,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
      addToLoopUseLists(S);
    }
    S->setNoWrapFlags(Flags);
@@ -336,20 +346,20 @@ index cd74815a895..09e98345d0f 100644
    return S;
  }
  
-@@ -3631,8 +3591,11 @@ const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
-   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+@@ -3615,8 +3634,11 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
+     return ExistingSCEV;
    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
-+  SCEVMinMaxExpr *S = new (SCEVAllocator) SCEVMinMaxExpr(
-       ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
+-  SCEV *S = new (SCEVAllocator)
++  SCEVMinMaxExpr *S = new (SCEVAllocator)
+       SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
 +  // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the
 +  // operands all need to be of the same type.
 +  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
+ 
    UniqueSCEVs.InsertNode(S, IP);
    addToLoopUseLists(S);
-   return S;
-@@ -3708,8 +3731,9 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
+@@ -3716,8 +3738,9 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
             "Stale SCEVUnknown in uniquing map!");
      return S;
    }
@@ -361,7 +371,7 @@ index cd74815a895..09e98345d0f 100644
    UniqueSCEVs.InsertNode(S, IP);
    return S;
 diff --git llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-index 5648e3aa74a..6936521f3a6 100644
+index 5648e3aa74af..6936521f3a64 100644
 --- llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
 +++ llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
 @@ -2,7 +2,7 @@
@@ -411,5 +421,5 @@ index 5648e3aa74a..6936521f3a6 100644
 +!1 = distinct !{!1, !2}
 +!2 = !{!"llvm.loop.isvectorized", i32 1}
 -- 
-2.25.1
+2.31.1
 
diff --git a/deps/patches/llvm-12-D97435-AArch64-movaddrreg.patch b/deps/patches/llvm-12-D97435-AArch64-movaddrreg.patch
new file mode 100644
index 00000000000000..01d49a85f1007d
--- /dev/null
+++ b/deps/patches/llvm-12-D97435-AArch64-movaddrreg.patch
@@ -0,0 +1,164 @@
+From 3adadbab531e0d7dc17499a6570b129e87f00c77 Mon Sep 17 00:00:00 2001
+From: Keno Fischer <keno@juliacomputing.com>
+Date: Wed, 21 Apr 2021 12:38:40 -0400
+Subject: [PATCH] [Aarch64] Correct register class for pseudo instructions
+
+This constrains the Mov* and similar pseudo instruction to take
+GPR64common register classes rather than GPR64. GPR64 includs XZR
+which is invalid here, because this pseudo instructions expands
+into an adrp/add pair sharing a destination register. XZR is invalid
+on add and attempting to encode it will instead increment the stack
+pointer causing crashes (downstream report at [1]). The test case
+there reproduces on LLVM11, but I do not have a test case that
+reaches this code path on main, since it is being masked by
+improved dead code elimination introduced in D91513. Nevertheless,
+this seems like a good thing to fix in case there are other cases
+that dead code elimination doesn't clean up (e.g. if `optnone` is
+used and the optimization is skipped).
+
+I think it would be worth auditing uses of GPR64 in pseudo
+instructions to see if there are any similar issues, but I do not
+have a high enough view of the backend or knowledge of the
+Aarch64 architecture to do this quickly.
+
+[1] https://github.com/JuliaLang/julia/issues/39818
+
+Reviewed By: t.p.northover
+
+Differential Revision: https://reviews.llvm.org/D97435
+---
+ .../AArch64/AArch64ExpandPseudoInsts.cpp      |  1 +
+ llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 32 +++++++++----------
+ .../GlobalISel/select-blockaddress.mir        |  5 +--
+ .../select-jump-table-brjt-constrain.mir      |  2 +-
+ .../GlobalISel/select-jump-table-brjt.mir     |  2 +-
+ 5 files changed, 22 insertions(+), 20 deletions(-)
+
+diff --git llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+index e57650ae60b1..612fbeb5f531 100644
+--- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
++++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+@@ -886,6 +886,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
+   case AArch64::MOVaddrEXT: {
+     // Expand into ADRP + ADD.
+     Register DstReg = MI.getOperand(0).getReg();
++    assert(DstReg != AArch64::XZR);
+     MachineInstrBuilder MIB1 =
+         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
+             .add(MI.getOperand(1));
+diff --git llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64InstrInfo.td
+index 171d3dbaa814..6fe0bd1ef168 100644
+--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
++++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
+@@ -656,40 +656,40 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in {
+ // removed, along with the AArch64Wrapper node.
+ 
+ let AddedComplexity = 10 in
+-def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
+-                     [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
++def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
++                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
+               Sched<[WriteLDAdr]>;
+ 
+ // The MOVaddr instruction should match only when the add is not folded
+ // into a load or store address.
+ def MOVaddr
+-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+-             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
++    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
++             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
+                                             tglobaladdr:$low))]>,
+       Sched<[WriteAdrAdr]>;
+ def MOVaddrJT
+-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+-             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
++    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
++             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
+                                              tjumptable:$low))]>,
+       Sched<[WriteAdrAdr]>;
+ def MOVaddrCP
+-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+-             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
++    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
++             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
+                                              tconstpool:$low))]>,
+       Sched<[WriteAdrAdr]>;
+ def MOVaddrBA
+-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+-             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
++    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
++             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
+                                              tblockaddress:$low))]>,
+       Sched<[WriteAdrAdr]>;
+ def MOVaddrTLS
+-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+-             [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
++    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
++             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
+                                             tglobaltlsaddr:$low))]>,
+       Sched<[WriteAdrAdr]>;
+ def MOVaddrEXT
+-    : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+-             [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
++    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
++             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
+                                             texternalsym:$low))]>,
+       Sched<[WriteAdrAdr]>;
+ // Normally AArch64addlow either gets folded into a following ldr/str,
+@@ -697,8 +697,8 @@ def MOVaddrEXT
+ // might appear without either of them, so allow lowering it into a plain
+ // add.
+ def ADDlowTLS
+-    : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low),
+-             [(set GPR64:$dst, (AArch64addlow GPR64:$src,
++    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
++             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
+                                             tglobaltlsaddr:$low))]>,
+       Sched<[WriteAdr]>;
+ 
+diff --git llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
+index 45012f23de62..70cb802ed3a3 100644
+--- llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
++++ llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
+@@ -30,9 +30,10 @@ registers:
+ body:             |
+   ; CHECK-LABEL: name: test_blockaddress
+   ; CHECK: bb.0 (%ir-block.0):
+-  ; CHECK:   [[MOVaddrBA:%[0-9]+]]:gpr64 = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block)
++  ; CHECK:   [[MOVaddrBA:%[0-9]+]]:gpr64common = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block)
+   ; CHECK:   [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr
+-  ; CHECK:   STRXui [[MOVaddrBA]], [[MOVaddr]], 0 :: (store 8 into @addr)
++  ; CHECK:   [[COPY:%[0-9]+]]:gpr64 = COPY [[MOVaddrBA]]
++  ; CHECK:   STRXui [[COPY]], [[MOVaddr]], 0 :: (store 8 into @addr)
+   ; CHECK:   BR [[MOVaddrBA]]
+   ; CHECK: bb.1.block (address-taken):
+   ; CHECK:   RET_ReallyLR
+diff --git llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir
+index 440a03173c83..59b8dea2d0ce 100644
+--- llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir
++++ llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir
+@@ -30,7 +30,7 @@ body:             |
+   ; CHECK:   Bcc 8, %bb.3, implicit $nzcv
+   ; CHECK: bb.1:
+   ; CHECK:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+-  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
++  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
+   ; CHECK:   early-clobber %6:gpr64, early-clobber %7:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0
+   ; CHECK:   BR %6
+   ; CHECK: bb.2:
+diff --git llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
+index 6b84c6d10843..b8c9a6c881da 100644
+--- llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
++++ llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir
+@@ -65,7 +65,7 @@ body:             |
+   ; CHECK: bb.1.entry:
+   ; CHECK:   successors: %bb.3(0x2aaaaaab), %bb.4(0x2aaaaaab), %bb.2(0x2aaaaaab)
+   ; CHECK:   [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
+-  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
++  ; CHECK:   [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0
+   ; CHECK:   early-clobber %18:gpr64, early-clobber %19:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0
+   ; CHECK:   BR %18
+   ; CHECK: bb.2.sw.bb:
+-- 
+2.31.1
+
diff --git a/deps/patches/llvm-12-fde-symbols-aarch64.patch b/deps/patches/llvm-12-fde-symbols-aarch64.patch
new file mode 100644
index 00000000000000..c62e65765ae212
--- /dev/null
+++ b/deps/patches/llvm-12-fde-symbols-aarch64.patch
@@ -0,0 +1,158 @@
+From 7133a3d3b0bd639d36d9d40f1135159442ab73c7 Mon Sep 17 00:00:00 2001
+From: Cody Tapscott <cody+github@tapscott.me>
+Date: Mon, 24 May 2021 15:11:39 -0700
+Subject: [PATCH] Do not patch FDE symbols in RuntimeDyld, on targets that use
+ non-absolute symbol relocations in `.eh_frame`
+
+Since processFDE adds a delta to the values in the FDE, it assumes that the relocations for the .eh_frame section have not been applied by RuntimeDyld. It expects instead that only the relocation addend has been written to the symbol locations, and that the section-to-section offset needs to be added.
+
+However, there are platform differences that interfere with this:
+1) X86-64 has DwarfFDESymbolsUseAbsDiff enabled in its AsmInfo, causing an absolute symbol to be emitted for the FDE pcStart.  Absolute symbols are skipped as a relocation by RuntimeDyld, so the processFDE function in RuntimeDyldMachO.cpp calculates the relocation correctly.
+2) AArch64 has DwarfFDESymbolsUseAbsDiff disabled, so a relocation is emitted in the eh_frame section. Since this isn't absolute, the relocation is applied by RuntimeDyld. This means that processFDE ends up adding an additional section-to-section offset to the pcStart field, generating an incorrect FDE
+
+Differential Revision: https://reviews.llvm.org/D103052
+---
+ .../RuntimeDyld/RuntimeDyldMachO.cpp          | 37 +++++++++++--------
+ .../RuntimeDyld/RuntimeDyldMachO.h            |  8 +++-
+ .../Targets/RuntimeDyldMachOAArch64.h         |  2 +
+ .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h |  2 +
+ .../Targets/RuntimeDyldMachOI386.h            |  2 +
+ .../Targets/RuntimeDyldMachOX86_64.h          |  2 +
+ 6 files changed, 35 insertions(+), 18 deletions(-)
+
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+index 9ca76602ea18..e61bfd1bd31c 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+@@ -272,9 +272,9 @@ RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
+ }
+ 
+ template <typename Impl>
+-unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(uint8_t *P,
+-                                                          int64_t DeltaForText,
+-                                                          int64_t DeltaForEH) {
++unsigned char *RuntimeDyldMachOCRTPBase<Impl>::patchFDERelocations(uint8_t *P,
++                                                                   int64_t DeltaForText,
++                                                                   int64_t DeltaForEH) {
+   typedef typename Impl::TargetPtrT TargetPtrT;
+ 
+   LLVM_DEBUG(dbgs() << "Processing FDE: Delta for text: " << DeltaForText
+@@ -324,19 +324,24 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
+       continue;
+     SectionEntry *Text = &Sections[SectionInfo.TextSID];
+     SectionEntry *EHFrame = &Sections[SectionInfo.EHFrameSID];
+-    SectionEntry *ExceptTab = nullptr;
+-    if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID)
+-      ExceptTab = &Sections[SectionInfo.ExceptTabSID];
+-
+-    int64_t DeltaForText = computeDelta(Text, EHFrame);
+-    int64_t DeltaForEH = 0;
+-    if (ExceptTab)
+-      DeltaForEH = computeDelta(ExceptTab, EHFrame);
+-
+-    uint8_t *P = EHFrame->getAddress();
+-    uint8_t *End = P + EHFrame->getSize();
+-    while (P != End) {
+-      P = processFDE(P, DeltaForText, DeltaForEH);
++
++    // If the FDE includes absolute symbol relocations (not supported
++    // by RuntimeDyld), we need to manually patch-up the values
++    if (doDwarfFDESymbolsUseAbsDiff()) {
++      SectionEntry *ExceptTab = nullptr;
++      if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID)
++        ExceptTab = &Sections[SectionInfo.ExceptTabSID];
++
++      int64_t DeltaForText = computeDelta(Text, EHFrame);
++      int64_t DeltaForEH = 0;
++      if (ExceptTab)
++        DeltaForEH = computeDelta(ExceptTab, EHFrame);
++
++      uint8_t *P = EHFrame->getAddress();
++      uint8_t *End = P + EHFrame->getSize();
++      while (P != End) {
++        P = patchFDERelocations(P, DeltaForText, DeltaForEH);
++      }
+     }
+ 
+     MemMgr.registerEHFrames(EHFrame->getAddress(), EHFrame->getLoadAddress(),
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+index 650e7b79fbb8..a7e5c9cb56e8 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+@@ -43,6 +43,10 @@ protected:
+     SID ExceptTabSID;
+   };
+ 
++  // Returns true if the FDE section includes absolute symbol relocations
++  // on this platform.
++  virtual bool doDwarfFDESymbolsUseAbsDiff() = 0;
++
+   // When a module is loaded we save the SectionID of the EH frame section
+   // in a table until we receive a request to register all unregistered
+   // EH frame sections with the memory manager.
+@@ -147,8 +151,8 @@ private:
+   Impl &impl() { return static_cast<Impl &>(*this); }
+   const Impl &impl() const { return static_cast<const Impl &>(*this); }
+ 
+-  unsigned char *processFDE(uint8_t *P, int64_t DeltaForText,
+-                            int64_t DeltaForEH);
++  unsigned char *patchFDERelocations(uint8_t *P, int64_t DeltaForText,
++                                     int64_t DeltaForEH);
+ 
+ public:
+   RuntimeDyldMachOCRTPBase(RuntimeDyld::MemoryManager &MemMgr,
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+index f2ee1b06d494..90a9a4c44c84 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+@@ -30,6 +30,8 @@ public:
+ 
+   unsigned getStubAlignment() override { return 8; }
+ 
++  bool doDwarfFDESymbolsUseAbsDiff() override { return false; }
++  
+   /// Extract the addend encoded in the instruction / memory location.
+   Expected<int64_t> decodeAddend(const RelocationEntry &RE) const {
+     const SectionEntry &Section = Sections[RE.SectionID];
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+index a76958a9e2c2..7281249d25bf 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+@@ -33,6 +33,8 @@ public:
+ 
+   unsigned getStubAlignment() override { return 4; }
+ 
++  bool doDwarfFDESymbolsUseAbsDiff() override { return false; }
++
+   Expected<JITSymbolFlags> getJITSymbolFlags(const SymbolRef &SR) override {
+     auto Flags = RuntimeDyldImpl::getJITSymbolFlags(SR);
+     if (!Flags)
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+index 523deb29b723..755bc13afeb4 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+@@ -30,6 +30,8 @@ public:
+ 
+   unsigned getStubAlignment() override { return 1; }
+ 
++  bool doDwarfFDESymbolsUseAbsDiff() override { return true; }
++
+   Expected<relocation_iterator>
+   processRelocationRef(unsigned SectionID, relocation_iterator RelI,
+                        const ObjectFile &BaseObjT,
+diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+index 28febbdb948c..9854da24a2ce 100644
+--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
++++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+@@ -30,6 +30,8 @@ public:
+ 
+   unsigned getStubAlignment() override { return 8; }
+ 
++  bool doDwarfFDESymbolsUseAbsDiff() override { return true; }
++
+   Expected<relocation_iterator>
+   processRelocationRef(unsigned SectionID, relocation_iterator RelI,
+                        const ObjectFile &BaseObjT,
+-- 
+2.30.1 (Apple Git-130)
+
diff --git a/deps/patches/llvm-12-force-eh_frame-aarch64.patch b/deps/patches/llvm-12-force-eh_frame-aarch64.patch
new file mode 100644
index 00000000000000..aff55f35e7bd88
--- /dev/null
+++ b/deps/patches/llvm-12-force-eh_frame-aarch64.patch
@@ -0,0 +1,31 @@
+From 026f3518c4713e388a8ed06fa032e0925d35c6f5 Mon Sep 17 00:00:00 2001
+From: Cody Tapscott <cody+github@tapscott.me>
+Date: Mon, 24 May 2021 16:36:06 -0700
+Subject: [PATCH] Force `.eh_frame` emission on AArch64
+
+We need to force the emission of the EH Frame section (currently done via SupportsCompactUnwindWithoutEHFrame in the MCObjectFileInfo for the target), since libunwind doesn't yet support dynamically registering compact unwind information at run-time.
+---
+ llvm/lib/MC/MCObjectFileInfo.cpp | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
+index 1a448f040b3b..e12154deca5f 100644
+--- a/llvm/lib/MC/MCObjectFileInfo.cpp
++++ b/llvm/lib/MC/MCObjectFileInfo.cpp
+@@ -57,9 +57,10 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
+           MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT,
+       SectionKind::getReadOnly());
+ 
+-  if (T.isOSDarwin() &&
+-      (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
+-    SupportsCompactUnwindWithoutEHFrame = true;
++  // Disabled for now, since we need to emit EH Frames for stack unwinding in the JIT
++  // if (T.isOSDarwin() &&
++  //     (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
++  //   SupportsCompactUnwindWithoutEHFrame = true;
+ 
+   if (T.isWatchABI())
+     OmitDwarfIfHaveCompactUnwind = true;
+-- 
+2.30.1 (Apple Git-130)
+
diff --git a/deps/patches/llvm-8.0-D50167-scev-umin.patch b/deps/patches/llvm-8.0-D50167-scev-umin.patch
deleted file mode 100644
index f11fd546bb72c7..00000000000000
--- a/deps/patches/llvm-8.0-D50167-scev-umin.patch
+++ /dev/null
@@ -1,1870 +0,0 @@
-commit 18e563f695dd561c32393512fbdb8ce8771d7e5f
-Author: Keno Fischer <keno@juliacomputing.com>
-Date:   Thu May 2 08:35:22 2019 -0400
-
-    [SCEV] Add explicit representations of umin/smin
-    
-    Summary:
-    Currently we express umin as `~umax(~x, ~y)`. However, this becomes
-    a problem for operands in non-integral pointer spaces, because `~x`
-    is not something we can compute for `x` non-integral. However, since
-    comparisons are generally still allowed, we are actually able to
-    express `umin(x, y)` directly as long as we don't try to express is
-    as a umax. Support this by adding an explicit umin/smin representation
-    to SCEV. We do this by factoring the existing getUMax/getSMax functions
-    into a new function that does all four. The previous two functions were
-    largely identical.
-    
-    Reviewers: reames, sanjoy, mkazantsev
-    
-    Reviewed By: sanjoy
-    
-    Subscribers: tvvikram, dmgreen, vchuravy, javed.absar, llvm-commits
-    
-    Tags: #llvm
-    
-    Differential Revision: https://reviews.llvm.org/D50167
-
-diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
-index 8f4200b07e5..6b76a16a2b4 100644
---- a/include/llvm/Analysis/ScalarEvolution.h
-+++ b/include/llvm/Analysis/ScalarEvolution.h
-@@ -582,6 +582,8 @@ public:
-   /// \p IndexExprs The expressions for the indices.
-   const SCEV *getGEPExpr(GEPOperator *GEP,
-                          const SmallVectorImpl<const SCEV *> &IndexExprs);
-+  const SCEV *getMinMaxExpr(unsigned Kind,
-+                            SmallVectorImpl<const SCEV *> &Operands);
-   const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
-   const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
-   const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
-diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
-index 58d42680d6b..57d658b157d 100644
---- a/include/llvm/Analysis/ScalarEvolutionExpander.h
-+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
-@@ -368,6 +368,10 @@ namespace llvm {
- 
-     Value *visitUMaxExpr(const SCEVUMaxExpr *S);
- 
-+    Value *visitSMinExpr(const SCEVSMinExpr *S);
-+
-+    Value *visitUMinExpr(const SCEVUMinExpr *S);
-+
-     Value *visitUnknown(const SCEVUnknown *S) {
-       return S->getValue();
-     }
-diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
-index 42e76094eb2..99e39d484c5 100644
---- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
-+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
-@@ -40,7 +40,7 @@ class Type;
-     // These should be ordered in terms of increasing complexity to make the
-     // folders simpler.
-     scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
--    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr,
-+    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr,
-     scUnknown, scCouldNotCompute
-   };
- 
-@@ -183,10 +183,9 @@ class Type;
- 
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
--      return S->getSCEVType() == scAddExpr ||
--             S->getSCEVType() == scMulExpr ||
--             S->getSCEVType() == scSMaxExpr ||
--             S->getSCEVType() == scUMaxExpr ||
-+      return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-+             S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
-+             S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr ||
-              S->getSCEVType() == scAddRecExpr;
-     }
-   };
-@@ -201,10 +200,9 @@ class Type;
-   public:
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
--      return S->getSCEVType() == scAddExpr ||
--             S->getSCEVType() == scMulExpr ||
--             S->getSCEVType() == scSMaxExpr ||
--             S->getSCEVType() == scUMaxExpr;
-+      return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-+             S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
-+             S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr;
-     }
- 
-     /// Set flags for a non-recurrence without clearing previously set flags.
-@@ -358,17 +356,53 @@ class Type;
-     }
-   };
- 
--  /// This class represents a signed maximum selection.
--  class SCEVSMaxExpr : public SCEVCommutativeExpr {
-+  /// This node is the base class min/max selections.
-+  class SCEVMinMaxExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
- 
--    SCEVSMaxExpr(const FoldingSetNodeIDRef ID,
--                 const SCEV *const *O, size_t N)
--      : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
--      // Max never overflows.
-+    static bool isMinMaxType(enum SCEVTypes T) {
-+      return T == scSMaxExpr || T == scUMaxExpr || T == scSMinExpr ||
-+             T == scUMinExpr;
-+    }
-+
-+  protected:
-+    /// Note: Constructing subclasses via this constructor is allowed
-+    SCEVMinMaxExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T,
-+                   const SCEV *const *O, size_t N)
-+        : SCEVCommutativeExpr(ID, T, O, N) {
-+      assert(isMinMaxType(T));
-+      // Min and max nenver overflow
-       setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
-     }
- 
-+  public:
-+    static bool classof(const SCEV *S) {
-+      return isMinMaxType(static_cast<SCEVTypes>(S->getSCEVType()));
-+    }
-+
-+    static enum SCEVTypes negate(enum SCEVTypes T) {
-+      switch (T) {
-+      case scSMaxExpr:
-+        return scSMinExpr;
-+      case scSMinExpr:
-+        return scSMaxExpr;
-+      case scUMaxExpr:
-+        return scUMaxExpr;
-+      case scUMinExpr:
-+        return scUMinExpr;
-+      default:
-+        llvm_unreachable("Not a min or max SCEV type!");
-+      }
-+    }
-+  };
-+
-+  /// This class represents a signed maximum selection.
-+  class SCEVSMaxExpr : public SCEVMinMaxExpr {
-+    friend class ScalarEvolution;
-+
-+    SCEVSMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-+        : SCEVMinMaxExpr(ID, scSMaxExpr, O, N) {}
-+
-   public:
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-@@ -377,15 +411,11 @@ class Type;
-   };
- 
-   /// This class represents an unsigned maximum selection.
--  class SCEVUMaxExpr : public SCEVCommutativeExpr {
-+  class SCEVUMaxExpr : public SCEVMinMaxExpr {
-     friend class ScalarEvolution;
- 
--    SCEVUMaxExpr(const FoldingSetNodeIDRef ID,
--                 const SCEV *const *O, size_t N)
--      : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
--      // Max never overflows.
--      setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
--    }
-+    SCEVUMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-+        : SCEVMinMaxExpr(ID, scUMaxExpr, O, N) {}
- 
-   public:
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-@@ -394,6 +424,34 @@ class Type;
-     }
-   };
- 
-+  /// This class represents a signed minimum selection.
-+  class SCEVSMinExpr : public SCEVMinMaxExpr {
-+    friend class ScalarEvolution;
-+
-+    SCEVSMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-+        : SCEVMinMaxExpr(ID, scSMinExpr, O, N) {}
-+
-+  public:
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) {
-+      return S->getSCEVType() == scSMinExpr;
-+    }
-+  };
-+
-+  /// This class represents an unsigned minimum selection.
-+  class SCEVUMinExpr : public SCEVMinMaxExpr {
-+    friend class ScalarEvolution;
-+
-+    SCEVUMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-+        : SCEVMinMaxExpr(ID, scUMinExpr, O, N) {}
-+
-+  public:
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) {
-+      return S->getSCEVType() == scUMinExpr;
-+    }
-+  };
-+
-   /// This means that we are dealing with an entirely unknown SCEV
-   /// value, and only represent it as its LLVM Value.  This is the
-   /// "bottom" value for the analysis.
-@@ -466,6 +524,10 @@ class Type;
-         return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S);
-       case scUMaxExpr:
-         return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S);
-+      case scSMinExpr:
-+        return ((SC *)this)->visitSMinExpr((const SCEVSMinExpr *)S);
-+      case scUMinExpr:
-+        return ((SC *)this)->visitUMinExpr((const SCEVUMinExpr *)S);
-       case scUnknown:
-         return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
-       case scCouldNotCompute:
-@@ -519,6 +581,8 @@ class Type;
-         case scMulExpr:
-         case scSMaxExpr:
-         case scUMaxExpr:
-+        case scSMinExpr:
-+        case scUMinExpr:
-         case scAddRecExpr:
-           for (const auto *Op : cast<SCEVNAryExpr>(S)->operands())
-             push(Op);
-@@ -681,6 +745,26 @@ class Type;
-       return !Changed ? Expr : SE.getUMaxExpr(Operands);
-     }
- 
-+    const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) {
-+      SmallVector<const SCEV *, 2> Operands;
-+      bool Changed = false;
-+      for (auto *Op : Expr->operands()) {
-+        Operands.push_back(((SC *)this)->visit(Op));
-+        Changed |= Op != Operands.back();
-+      }
-+      return !Changed ? Expr : SE.getSMinExpr(Operands);
-+    }
-+
-+    const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) {
-+      SmallVector<const SCEV *, 2> Operands;
-+      bool Changed = false;
-+      for (auto *Op : Expr->operands()) {
-+        Operands.push_back(((SC *)this)->visit(Op));
-+        Changed |= Op != Operands.back();
-+      }
-+      return !Changed ? Expr : SE.getUMinExpr(Operands);
-+    }
-+
-     const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-       return Expr;
-     }
-diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
-index e5134f2eeda..f2553de0af1 100644
---- a/lib/Analysis/ScalarEvolution.cpp
-+++ b/lib/Analysis/ScalarEvolution.cpp
-@@ -273,7 +273,9 @@ void SCEV::print(raw_ostream &OS) const {
-   case scAddExpr:
-   case scMulExpr:
-   case scUMaxExpr:
--  case scSMaxExpr: {
-+  case scSMaxExpr:
-+  case scUMinExpr:
-+  case scSMinExpr: {
-     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
-     const char *OpStr = nullptr;
-     switch (NAry->getSCEVType()) {
-@@ -281,6 +283,12 @@ void SCEV::print(raw_ostream &OS) const {
-     case scMulExpr: OpStr = " * "; break;
-     case scUMaxExpr: OpStr = " umax "; break;
-     case scSMaxExpr: OpStr = " smax "; break;
-+    case scUMinExpr:
-+      OpStr = " umin ";
-+      break;
-+    case scSMinExpr:
-+      OpStr = " smin ";
-+      break;
-     }
-     OS << "(";
-     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
-@@ -349,6 +357,8 @@ Type *SCEV::getType() const {
-   case scMulExpr:
-   case scUMaxExpr:
-   case scSMaxExpr:
-+  case scUMinExpr:
-+  case scSMinExpr:
-     return cast<SCEVNAryExpr>(this)->getType();
-   case scAddExpr:
-     return cast<SCEVAddExpr>(this)->getType();
-@@ -713,7 +723,9 @@ static int CompareSCEVComplexity(
-   case scAddExpr:
-   case scMulExpr:
-   case scSMaxExpr:
--  case scUMaxExpr: {
-+  case scUMaxExpr:
-+  case scSMinExpr:
-+  case scUMinExpr: {
-     const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
-     const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
- 
-@@ -913,6 +925,8 @@ public:
-   void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
-   void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
-   void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
-+  void visitSMinExpr(const SCEVSMinExpr *Numerator) {}
-+  void visitUMinExpr(const SCEVUMinExpr *Numerator) {}
-   void visitUnknown(const SCEVUnknown *Numerator) {}
-   void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
- 
-@@ -3493,209 +3507,153 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
-   return getAddExpr(BaseExpr, TotalOffset, Wrap);
- }
- 
--const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
--                                         const SCEV *RHS) {
--  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
--  return getSMaxExpr(Ops);
--}
--
--const SCEV *
--ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
--  assert(!Ops.empty() && "Cannot get empty smax!");
-+const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
-+                                           SmallVectorImpl<const SCEV *> &Ops) {
-+  assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
-   if (Ops.size() == 1) return Ops[0];
- #ifndef NDEBUG
-   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
-   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
-     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
--           "SCEVSMaxExpr operand types don't match!");
-+           "Operand types don't match!");
- #endif
- 
-+  bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr;
-+  bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr;
-+
-   // Sort by complexity, this groups all similar expression types together.
-   GroupByComplexity(Ops, &LI, DT);
- 
-+
-+
-+
-+
-+
-   // If there are any constants, fold them together.
-   unsigned Idx = 0;
-   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
-     ++Idx;
-     assert(Idx < Ops.size());
-+    auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
-+      if (Kind == scSMaxExpr)
-+        return APIntOps::smax(LHS, RHS);
-+      else if (Kind == scSMinExpr)
-+        return APIntOps::smin(LHS, RHS);
-+      else if (Kind == scUMaxExpr)
-+        return APIntOps::umax(LHS, RHS);
-+      else if (Kind == scUMinExpr)
-+        return APIntOps::umin(LHS, RHS);
-+      llvm_unreachable("Unknown SCEV min/max opcode");
-+    };
-+
-     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
-       // We found two constants, fold them together!
-       ConstantInt *Fold = ConstantInt::get(
--          getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
-+          getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
-       Ops[0] = getConstant(Fold);
-       Ops.erase(Ops.begin()+1);  // Erase the folded element
-       if (Ops.size() == 1) return Ops[0];
-       LHSC = cast<SCEVConstant>(Ops[0]);
-     }
- 
--    // If we are left with a constant minimum-int, strip it off.
--    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
-+    bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
-+    bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);
-+
-+    if (IsMax ? IsMinV : IsMaxV) {
-+      // If we are left with a constant minimum(/maximum)-int, strip it off.
-       Ops.erase(Ops.begin());
-       --Idx;
--    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
--      // If we have an smax with a constant maximum-int, it will always be
--      // maximum-int.
--      return Ops[0];
-+    } else if (IsMax ? IsMaxV : IsMinV) {
-+      // If we have a max(/min) with a constant maximum(/minimum)-int,
-+      // it will always be the extremum.
-+      return LHSC;
-     }
- 
-     if (Ops.size() == 1) return Ops[0];
-   }
- 
--  // Find the first SMax
--  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
-+  // Find the first operation of the same kind
-+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind)
-     ++Idx;
- 
--  // Check to see if one of the operands is an SMax. If so, expand its operands
--  // onto our operand list, and recurse to simplify.
-+  // Check to see if one of the operands is of the same kind. If so, expand its
-+  // operands onto our operand list, and recurse to simplify.
-   if (Idx < Ops.size()) {
--    bool DeletedSMax = false;
--    while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
-+    bool DeletedAny = false;
-+    while (Ops[Idx]->getSCEVType() == Kind) {
-+      const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
-       Ops.erase(Ops.begin()+Idx);
--      Ops.append(SMax->op_begin(), SMax->op_end());
--      DeletedSMax = true;
-+      Ops.append(SMME->op_begin(), SMME->op_end());
-+      DeletedAny = true;
-     }
- 
--    if (DeletedSMax)
--      return getSMaxExpr(Ops);
-+    if (DeletedAny)
-+      return getMinMaxExpr(Kind, Ops);
-   }
- 
-   // Okay, check to see if the same value occurs in the operand list twice.  If
-   // so, delete one.  Since we sorted the list, these values are required to
-   // be adjacent.
--  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
--    //  X smax Y smax Y  -->  X smax Y
--    //  X smax Y         -->  X, if X is always greater than Y
--    if (Ops[i] == Ops[i+1] ||
--        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
--      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
--      --i; --e;
--    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
--      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
--      --i; --e;
-+  llvm::CmpInst::Predicate GEPred =
-+      IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
-+  llvm::CmpInst::Predicate LEPred =
-+      IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
-+  llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
-+  llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
-+  for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
-+    if (Ops[i] == Ops[i + 1] ||
-+        isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
-+      //  X op Y op Y  -->  X op Y
-+      //  X op Y       -->  X, if we know X, Y are ordered appropriately
-+      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
-+      --i;
-+      --e;
-+    } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
-+                                               Ops[i + 1])) {
-+      //  X op Y       -->  Y, if we know X, Y are ordered appropriately
-+      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
-+      --i;
-+      --e;
-     }
-+  }
- 
-   if (Ops.size() == 1) return Ops[0];
- 
-   assert(!Ops.empty() && "Reduced smax down to nothing!");
- 
--  // Okay, it looks like we really DO need an smax expr.  Check to see if we
-+  // Okay, it looks like we really DO need an expr.  Check to see if we
-   // already have one, otherwise create a new one.
-   FoldingSetNodeID ID;
--  ID.AddInteger(scSMaxExpr);
-+  ID.AddInteger(Kind);
-   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-     ID.AddPointer(Ops[i]);
-   void *IP = nullptr;
-   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
--                                             O, Ops.size());
-+  SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
-+      ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
-   UniqueSCEVs.InsertNode(S, IP);
-   addToLoopUseLists(S);
-   return S;
- }
- 
--const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
--                                         const SCEV *RHS) {
-+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
-   SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
--  return getUMaxExpr(Ops);
-+  return getSMaxExpr(Ops);
- }
- 
--const SCEV *
--ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
--  assert(!Ops.empty() && "Cannot get empty umax!");
--  if (Ops.size() == 1) return Ops[0];
--#ifndef NDEBUG
--  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
--  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
--    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
--           "SCEVUMaxExpr operand types don't match!");
--#endif
--
--  // Sort by complexity, this groups all similar expression types together.
--  GroupByComplexity(Ops, &LI, DT);
--
--  // If there are any constants, fold them together.
--  unsigned Idx = 0;
--  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
--    ++Idx;
--    assert(Idx < Ops.size());
--    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
--      // We found two constants, fold them together!
--      ConstantInt *Fold = ConstantInt::get(
--          getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
--      Ops[0] = getConstant(Fold);
--      Ops.erase(Ops.begin()+1);  // Erase the folded element
--      if (Ops.size() == 1) return Ops[0];
--      LHSC = cast<SCEVConstant>(Ops[0]);
--    }
--
--    // If we are left with a constant minimum-int, strip it off.
--    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
--      Ops.erase(Ops.begin());
--      --Idx;
--    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
--      // If we have an umax with a constant maximum-int, it will always be
--      // maximum-int.
--      return Ops[0];
--    }
--
--    if (Ops.size() == 1) return Ops[0];
--  }
--
--  // Find the first UMax
--  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
--    ++Idx;
--
--  // Check to see if one of the operands is a UMax. If so, expand its operands
--  // onto our operand list, and recurse to simplify.
--  if (Idx < Ops.size()) {
--    bool DeletedUMax = false;
--    while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
--      Ops.erase(Ops.begin()+Idx);
--      Ops.append(UMax->op_begin(), UMax->op_end());
--      DeletedUMax = true;
--    }
--
--    if (DeletedUMax)
--      return getUMaxExpr(Ops);
--  }
--
--  // Okay, check to see if the same value occurs in the operand list twice.  If
--  // so, delete one.  Since we sorted the list, these values are required to
--  // be adjacent.
--  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
--    //  X umax Y umax Y  -->  X umax Y
--    //  X umax Y         -->  X, if X is always greater than Y
--    if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning(
--                                    ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) {
--      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
--      --i; --e;
--    } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i],
--                                               Ops[i + 1])) {
--      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
--      --i; --e;
--    }
--
--  if (Ops.size() == 1) return Ops[0];
-+const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
-+  return getMinMaxExpr(scSMaxExpr, Ops);
-+}
- 
--  assert(!Ops.empty() && "Reduced umax down to nothing!");
-+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
-+  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
-+  return getUMaxExpr(Ops);
-+}
- 
--  // Okay, it looks like we really DO need a umax expr.  Check to see if we
--  // already have one, otherwise create a new one.
--  FoldingSetNodeID ID;
--  ID.AddInteger(scUMaxExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
--    ID.AddPointer(Ops[i]);
--  void *IP = nullptr;
--  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
--  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
--  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
--                                             O, Ops.size());
--  UniqueSCEVs.InsertNode(S, IP);
--  addToLoopUseLists(S);
--  return S;
-+const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
-+  return getMinMaxExpr(scUMaxExpr, Ops);
- }
- 
- const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
-@@ -3705,11 +3663,7 @@ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
- }
- 
- const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
--  // ~smax(~x, ~y, ~z) == smin(x, y, z).
--  SmallVector<const SCEV *, 2> NotOps;
--  for (auto *S : Ops)
--    NotOps.push_back(getNotSCEV(S));
--  return getNotSCEV(getSMaxExpr(NotOps));
-+  return getMinMaxExpr(scSMinExpr, Ops);
- }
- 
- const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
-@@ -3719,16 +3673,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
- }
- 
- const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
--  assert(!Ops.empty() && "At least one operand must be!");
--  // Trivial case.
--  if (Ops.size() == 1)
--    return Ops[0];
--
--  // ~umax(~x, ~y, ~z) == umin(x, y, z).
--  SmallVector<const SCEV *, 2> NotOps;
--  for (auto *S : Ops)
--    NotOps.push_back(getNotSCEV(S));
--  return getNotSCEV(getUMaxExpr(NotOps));
-+  return getMinMaxExpr(scUMinExpr, Ops);
- }
- 
- const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
-@@ -3970,12 +3915,45 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
-       V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
- }
- 
-+/// If Expr computes ~A, return A else return nullptr
-+static const SCEV *MatchNotExpr(const SCEV *Expr) {
-+  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
-+  if (!Add || Add->getNumOperands() != 2 ||
-+      !Add->getOperand(0)->isAllOnesValue())
-+    return nullptr;
-+
-+  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
-+  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
-+      !AddRHS->getOperand(0)->isAllOnesValue())
-+    return nullptr;
-+
-+  return AddRHS->getOperand(1);
-+}
-+
- /// Return a SCEV corresponding to ~V = -1-V
- const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
-   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-     return getConstant(
-                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
- 
-+  // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y)
-+  if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
-+    auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
-+      SmallVector<const SCEV *, 2> MatchedOperands;
-+      for (const SCEV *Operand : MME->operands()) {
-+        const SCEV *Matched = MatchNotExpr(Operand);
-+        if (!Matched)
-+          return (const SCEV *)nullptr;
-+        MatchedOperands.push_back(Matched);
-+      }
-+      return getMinMaxExpr(
-+          SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())),
-+          MatchedOperands);
-+    };
-+    if (const SCEV *Replaced = MatchMinMaxNegation(MME))
-+      return Replaced;
-+  }
-+
-   Type *Ty = V->getType();
-   Ty = getEffectiveSCEVType(Ty);
-   const SCEV *AllOnes =
-@@ -5196,6 +5174,8 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
-       switch (S->getSCEVType()) {
-       case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
-       case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
-+      case scUMinExpr:
-+      case scSMinExpr:
-         // These expressions are available if their operand(s) is/are.
-         return true;
- 
-@@ -8075,7 +8055,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
-     }
-     case scSMaxExpr:
-     case scUMaxExpr:
--      break; // TODO: smax, umax.
-+    case scSMinExpr:
-+    case scUMinExpr:
-+      break; // TODO: smax, umax, smin, umax.
-   }
-   return nullptr;
- }
-@@ -8201,10 +8183,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
-           return getAddExpr(NewOps);
-         if (isa<SCEVMulExpr>(Comm))
-           return getMulExpr(NewOps);
--        if (isa<SCEVSMaxExpr>(Comm))
--          return getSMaxExpr(NewOps);
--        if (isa<SCEVUMaxExpr>(Comm))
--          return getUMaxExpr(NewOps);
-+        if (isa<SCEVMinMaxExpr>(Comm))
-+          return getMinMaxExpr(Comm->getSCEVType(), NewOps);
-         llvm_unreachable("Unknown commutative SCEV type!");
-       }
-     }
-@@ -10045,41 +10025,15 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
-                                      getNotSCEV(FoundLHS));
- }
- 
--/// If Expr computes ~A, return A else return nullptr
--static const SCEV *MatchNotExpr(const SCEV *Expr) {
--  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
--  if (!Add || Add->getNumOperands() != 2 ||
--      !Add->getOperand(0)->isAllOnesValue())
--    return nullptr;
--
--  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
--  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
--      !AddRHS->getOperand(0)->isAllOnesValue())
--    return nullptr;
--
--  return AddRHS->getOperand(1);
--}
--
--/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
--template<typename MaxExprType>
--static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
--                              const SCEV *Candidate) {
--  const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
--  if (!MaxExpr) return false;
--
--  return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
--}
--
--/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
--template<typename MaxExprType>
--static bool IsMinConsistingOf(ScalarEvolution &SE,
--                              const SCEV *MaybeMinExpr,
--                              const SCEV *Candidate) {
--  const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
--  if (!MaybeMaxExpr)
-+/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
-+template <typename MinMaxExprType>
-+static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
-+                                 const SCEV *Candidate) {
-+  const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
-+  if (!MinMaxExpr)
-     return false;
- 
--  return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
-+  return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end();
- }
- 
- static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
-@@ -10128,20 +10082,20 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
-     LLVM_FALLTHROUGH;
-   case ICmpInst::ICMP_SLE:
-     return
--      // min(A, ...) <= A
--      IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
--      // A <= max(A, ...)
--      IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
-+        // min(A, ...) <= A
-+        IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) ||
-+        // A <= max(A, ...)
-+        IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
- 
-   case ICmpInst::ICMP_UGE:
-     std::swap(LHS, RHS);
-     LLVM_FALLTHROUGH;
-   case ICmpInst::ICMP_ULE:
-     return
--      // min(A, ...) <= A
--      IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
--      // A <= max(A, ...)
--      IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
-+        // min(A, ...) <= A
-+        IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) ||
-+        // A <= max(A, ...)
-+        IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
-   }
- 
-   llvm_unreachable("covered switch fell through?!");
-@@ -11611,7 +11565,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
-   case scAddExpr:
-   case scMulExpr:
-   case scUMaxExpr:
--  case scSMaxExpr: {
-+  case scSMaxExpr:
-+  case scUMinExpr:
-+  case scSMinExpr: {
-     bool HasVarying = false;
-     for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
-       LoopDisposition D = getLoopDisposition(Op, L);
-@@ -11698,7 +11654,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
-   case scAddExpr:
-   case scMulExpr:
-   case scUMaxExpr:
--  case scSMaxExpr: {
-+  case scSMaxExpr:
-+  case scUMinExpr:
-+  case scSMinExpr: {
-     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
-     bool Proper = true;
-     for (const SCEV *NAryOp : NAry->operands()) {
-diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
-index ca5cf1663b8..b56ec40ab75 100644
---- a/lib/Analysis/ScalarEvolutionExpander.cpp
-+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
-@@ -1634,7 +1634,8 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
-   for (int i = S->getNumOperands()-2; i >= 0; --i) {
-     // In the case of mixed integer and pointer types, do the
-     // rest of the comparisons as integer.
--    if (S->getOperand(i)->getType() != Ty) {
-+    Type *OpTy = S->getOperand(i)->getType();
-+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
-       Ty = SE.getEffectiveSCEVType(Ty);
-       LHS = InsertNoopCastOfTo(LHS, Ty);
-     }
-@@ -1658,7 +1659,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
-   for (int i = S->getNumOperands()-2; i >= 0; --i) {
-     // In the case of mixed integer and pointer types, do the
-     // rest of the comparisons as integer.
--    if (S->getOperand(i)->getType() != Ty) {
-+    Type *OpTy = S->getOperand(i)->getType();
-+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
-       Ty = SE.getEffectiveSCEVType(Ty);
-       LHS = InsertNoopCastOfTo(LHS, Ty);
-     }
-@@ -1676,6 +1678,56 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
-   return LHS;
- }
- 
-+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
-+  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
-+  Type *Ty = LHS->getType();
-+  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
-+    // In the case of mixed integer and pointer types, do the
-+    // rest of the comparisons as integer.
-+    Type *OpTy = S->getOperand(i)->getType();
-+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
-+      Ty = SE.getEffectiveSCEVType(Ty);
-+      LHS = InsertNoopCastOfTo(LHS, Ty);
-+    }
-+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
-+    Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
-+    rememberInstruction(ICmp);
-+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
-+    rememberInstruction(Sel);
-+    LHS = Sel;
-+  }
-+  // In the case of mixed integer and pointer types, cast the
-+  // final result back to the pointer type.
-+  if (LHS->getType() != S->getType())
-+    LHS = InsertNoopCastOfTo(LHS, S->getType());
-+  return LHS;
-+}
-+
-+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
-+  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
-+  Type *Ty = LHS->getType();
-+  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
-+    // In the case of mixed integer and pointer types, do the
-+    // rest of the comparisons as integer.
-+    Type *OpTy = S->getOperand(i)->getType();
-+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
-+      Ty = SE.getEffectiveSCEVType(Ty);
-+      LHS = InsertNoopCastOfTo(LHS, Ty);
-+    }
-+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
-+    Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
-+    rememberInstruction(ICmp);
-+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
-+    rememberInstruction(Sel);
-+    LHS = Sel;
-+  }
-+  // In the case of mixed integer and pointer types, cast the
-+  // final result back to the pointer type.
-+  if (LHS->getType() != S->getType())
-+    LHS = InsertNoopCastOfTo(LHS, S->getType());
-+  return LHS;
-+}
-+
- Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
-                                    Instruction *IP) {
-   setInsertPoint(IP);
-@@ -2102,7 +2154,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
- 
-   // HowManyLessThans uses a Max expression whenever the loop is not guarded by
-   // the exit condition.
--  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
-+  if (isa<SCEVMinMaxExpr>(S))
-     return true;
- 
-   // Recurse past nary expressions, which commonly occur in the
-diff --git a/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll
-new file mode 100644
-index 00000000000..a08632f38d1
---- /dev/null
-+++ b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll
-@@ -0,0 +1,50 @@
-+; RUN: opt -loop-versioning -S < %s | FileCheck %s
-+
-+; NB: addrspaces 10-13 are non-integral
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
-+
-+%jl_value_t = type opaque
-+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
-+
-+define void @"japi1_permutedims!_33509"(%jl_value_t addrspace(10)**) {
-+; CHECK: [[CMP:%[^ ]*]] = icmp ult double addrspace(13)* [[A:%[^ ]*]], [[B:%[^ ]*]]
-+; CHECK: [[SELECT:%[^ ]*]] = select i1 %18, double addrspace(13)* [[A]], double addrspace(13)* [[B]]
-+top:
-+  %1 = alloca [3 x i64], align 8 
-+  %2 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8
-+  %3 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, i64 1
-+  %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %3, align 8
-+  %5 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 0
-+  store i64 1, i64* %5, align 8
-+  %6 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 1
-+  %7 = load i64, i64* inttoptr (i64 24 to i64*), align 8
-+  %8 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)*
-+  %9 = bitcast %jl_value_t addrspace(11)* %8 to double addrspace(13)* addrspace(11)*
-+  %10 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %9, align 8
-+  %11 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)*
-+  %12 = bitcast %jl_value_t addrspace(11)* %11 to double addrspace(13)* addrspace(11)*
-+  %13 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %12, align 8
-+  %14 = load i64, i64* %6, align 8
-+  br label %L74
-+
-+L74:
-+  %value_phi20 = phi i64 [ 1, %top ], [ %22, %L74 ]
-+  %value_phi21 = phi i64 [ 1, %top ], [ %23, %L74 ]
-+  %value_phi22 = phi i64 [ 1, %top ], [ %25, %L74 ]
-+  %15 = add i64 %value_phi21, -1
-+  %16 = getelementptr inbounds double, double addrspace(13)* %10, i64 %15
-+  %17 = bitcast double addrspace(13)* %16 to i64 addrspace(13)*
-+  %18 = load i64, i64 addrspace(13)* %17, align 8
-+  %19 = add i64 %value_phi20, -1
-+  %20 = getelementptr inbounds double, double addrspace(13)* %13, i64 %19
-+  %21 = bitcast double addrspace(13)* %20 to i64 addrspace(13)*
-+  store i64 %18, i64 addrspace(13)* %21, align 8
-+  %22 = add i64 %value_phi20, 1
-+  %23 = add i64 %14, %value_phi21
-+  %24 = icmp eq i64 %value_phi22, %7
-+  %25 = add i64 %value_phi22, 1
-+  br i1 %24, label %L94, label %L74
-+
-+L94:
-+  ret void 
-+}
-diff --git a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
-index 405a47554e4..4285ef0f117 100644
---- a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
-+++ b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
-@@ -58,7 +58,7 @@ for.end:                                          ; preds = %for.body
- 
- ; Here it is not obvious what the limits are, since 'step' could be negative.
- 
--; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
-+; CHECK: Low: ((60000 + %a)<nsw> umin (60000 + (-40000 * %step) + %a)) 
- ; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
- 
- define void @g(i64 %step) {
-diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
-index 3542ad2a41e..d930706d7d2 100644
---- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
-+++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
-@@ -22,5 +22,5 @@ afterfor:		; preds = %forinc, %entry
- 	ret i32 %j.0.lcssa
- }
- 
--; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}}))))
-+; CHECK: backedge-taken count is (-2147483633 + (-1 * (%{{[xy]}} smin %{{[xy]}})))
- 
-diff --git a/test/Analysis/ScalarEvolution/min-max-exprs.ll b/test/Analysis/ScalarEvolution/min-max-exprs.ll
-index e8c1e33e095..51f72c643cc 100644
---- a/test/Analysis/ScalarEvolution/min-max-exprs.ll
-+++ b/test/Analysis/ScalarEvolution/min-max-exprs.ll
-@@ -33,7 +33,7 @@ bb2:                                              ; preds = %bb1
-   %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
- ;                  min(N, i+3)
- ; CHECK:           select i1 %tmp4, i64 %tmp5, i64 %tmp6
--; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nuw><%bb1> to i64))<nsw>)<nsw> smax (-1 + (-1 * (sext i32 %N to i64))<nsw>)<nsw>))<nsw>)<nsw>
-+; CHECK-NEXT:  --> ((sext i32 {3,+,1}<nuw><%bb1> to i64) smin (sext i32 %N to i64))
-   %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
-   %tmp12 = load i32, i32* %tmp11, align 4
-   %tmp13 = shl nsw i32 %tmp12, 1
-diff --git a/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/test/Analysis/ScalarEvolution/predicated-trip-count.ll
-index a0afcf457d2..b07662ed95f 100644
---- a/test/Analysis/ScalarEvolution/predicated-trip-count.ll
-+++ b/test/Analysis/ScalarEvolution/predicated-trip-count.ll
-@@ -80,7 +80,7 @@ return:         ; preds = %bb5
- ; CHECK-NEXT:    -->  (sext i16 {%Start,+,-1}<%bb3> to i32)
- ; CHECK:       Loop %bb3: Unpredictable backedge-taken count.
- ; CHECK-NEXT:  Loop %bb3: Unpredictable max backedge-taken count.
--; CHECK-NEXT:  Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))<nsw>) smax (-1 + (-1 * %M))))
-+; CHECK-NEXT:  Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32))<nsw> smin %M)))
- ; CHECK-NEXT:  Predicates:
- ; CHECK-NEXT:    {%Start,+,-1}<%bb3> Added Flags: <nssw>
- 
-diff --git a/test/Analysis/ScalarEvolution/trip-count14.ll b/test/Analysis/ScalarEvolution/trip-count14.ll
-index 5e6cfe85101..15080613881 100644
---- a/test/Analysis/ScalarEvolution/trip-count14.ll
-+++ b/test/Analysis/ScalarEvolution/trip-count14.ll
-@@ -81,7 +81,7 @@ if.end:
-   br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
- 
- ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit
--; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) smax %n)) + %n) umax (-1 + (-1 * %x) + %n))))
-+; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (((-1 * %n) + ((2 + %n) smax %n)) umin ((-1 * %n) + %x))
- ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
- 
- do.end:
-@@ -169,7 +169,7 @@ if.end:
-   br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
- 
- ; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit
--; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) umax %n)) + %n) umax (-1 + (-1 * %x) + %n))))
-+; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (((-1 * %n) + ((2 + %n) umax %n)) umin ((-1 * %n) + %x))
- ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
- 
- do.end:
-diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
-index df6637a4ced..e10012c0c32 100644
---- a/test/Analysis/ScalarEvolution/trip-count3.ll
-+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
-@@ -4,7 +4,7 @@
- ; dividing by the stride will have a remainder. This could theoretically
- ; be teaching it how to use a more elaborate trip count computation.
- 
--; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64)
-+; CHECK: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64)
- ; CHECK: Loop %bb3.i: max backedge-taken count is 33554431
- 
- %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
-diff --git a/test/Transforms/IRCE/conjunctive-checks.ll b/test/Transforms/IRCE/conjunctive-checks.ll
-index 60a0af83174..8711c1b00e8 100644
---- a/test/Transforms/IRCE/conjunctive-checks.ll
-+++ b/test/Transforms/IRCE/conjunctive-checks.ll
-@@ -5,17 +5,15 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) {
- ; CHECK-LABEL: @f_0(
- 
- ; CHECK: loop.preheader:
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
--; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
--; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
--; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
-+; CHECK: [[len_sub:[^ ]+]] = add i32 %len, -4
-+; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[len_sub]]
-+; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[len_sub]]
- ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
- ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
- ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
--; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit
-+; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader2:[^ ,]+]], label %main.pseudo.exit
- 
--; CHECK: loop.preheader2:
-+; CHECK: [[loop_preheader2]]:
- ; CHECK: br label %loop
- 
-  entry:
-@@ -35,9 +33,9 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) {
- ; CHECK: loop:
- ; CHECK:  %cond = load volatile i1, i1* %cond_buf
- ; CHECK:  %abc = and i1 %cond, true
--; CHECK:  br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit3, !prof !1
-+; CHECK:  br i1 %abc, label %in.bounds, label %[[loop_exit:[^ ,]+]], !prof !1
- 
--; CHECK: out.of.bounds.loopexit:
-+; CHECK: [[loop_exit]]:
- ; CHECK:  br label %out.of.bounds
- 
-  in.bounds:
-@@ -58,14 +56,10 @@ define void @f_1(
- ; CHECK-LABEL: @f_1(
- 
- ; CHECK: loop.preheader:
--; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b
--; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a
--; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
--; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
--; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
--; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
-+; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a
-+; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a
-+; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n 
-+; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n
- ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
- ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0
- 
-@@ -85,9 +79,9 @@ define void @f_1(
- 
- ; CHECK: loop:
- ; CHECK:   %abc = and i1 true, true
--; CHECK:   br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit4, !prof !1
-+; CHECK:   br i1 %abc, label %in.bounds, label %[[oob_loopexit:[^ ,]+]], !prof !1
- 
--; CHECK: out.of.bounds.loopexit:
-+; CHECK: [[oob_loopexit]]:
- ; CHECK-NEXT:  br label %out.of.bounds
- 
- 
-diff --git a/test/Transforms/IRCE/decrementing-loop.ll b/test/Transforms/IRCE/decrementing-loop.ll
-index 4c82cd3e341..2994a432a71 100644
---- a/test/Transforms/IRCE/decrementing-loop.ll
-+++ b/test/Transforms/IRCE/decrementing-loop.ll
-@@ -29,11 +29,8 @@ define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
-   ret void
- 
- ; CHECK: loop.preheader:
--; CHECK:   [[not_len:[^ ]+]] = sub i32 -1, %len
--; CHECK:   [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK:   [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]]
--; CHECK:   [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]]
--; CHECK:   [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]]
-+; CHECK:   [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n
-+; CHECK:   [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n
- ; CHECK:   [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0
- ; CHECK:   [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0
- ; CHECK:   %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1
-diff --git a/test/Transforms/IRCE/multiple-access-no-preloop.ll b/test/Transforms/IRCE/multiple-access-no-preloop.ll
-index 000d1ab36f2..3bde9bd8668 100644
---- a/test/Transforms/IRCE/multiple-access-no-preloop.ll
-+++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll
-@@ -38,14 +38,10 @@ define void @multiple_access_no_preloop(
- ; CHECK-LABEL: @multiple_access_no_preloop(
- 
- ; CHECK: loop.preheader:
--; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b
--; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a
--; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
--; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
--; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
--; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
-+; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a
-+; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a
-+; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n
-+; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n
- ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
- ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0
- 
-diff --git a/test/Transforms/IRCE/ranges_of_different_types.ll b/test/Transforms/IRCE/ranges_of_different_types.ll
-index 5c8161369f2..46bd94ce687 100644
---- a/test/Transforms/IRCE/ranges_of_different_types.ll
-+++ b/test/Transforms/IRCE/ranges_of_different_types.ll
-@@ -23,12 +23,11 @@ define void @test_01(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NOT:     preloop
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 12, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
--; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, -13
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
-+; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0
- ; CHECK-NEXT:      [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
- ; CHECK-NEXT:      br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
- ; CHECK:         loop
-@@ -83,13 +82,11 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NEXT:      [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
- ; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
- ; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
--; CHECK-NEXT:      [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
--; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
--; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]]
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
-+; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101
-+; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0
- ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
- ; CHECK:         loop.preloop:
- ; CHECK-NEXT:      %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ]
-@@ -151,14 +148,11 @@ define void @test_03(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NOT:     preloop
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -2, %len
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
--; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
--; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
--; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
--; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13
-+; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13
-+; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]]
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101
- ; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
- ; CHECK-NEXT:      br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
- ; CHECK:         postloop:
-@@ -208,10 +202,9 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-LABEL: test_04(
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -14, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, 13
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101
- ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
- ; CHECK:         in.bounds.preloop:
- ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
-@@ -252,12 +245,11 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NOT:     preloop
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 12, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
--; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, -13
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
-+; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0
- ; CHECK-NEXT:      [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
- ; CHECK-NEXT:      br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
- ; CHECK:         loop
-@@ -297,13 +289,11 @@ define void @test_06(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NEXT:      [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
- ; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
- ; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
--; CHECK-NEXT:      [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
--; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
--; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]]
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
-+; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101
-+; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0
- ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
- ; CHECK:         in.bounds.preloop:
- ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
-@@ -344,14 +334,11 @@ define void @test_07(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NOT:     preloop
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -2, %len
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
--; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
--; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
--; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
--; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13
-+; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13
-+; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]]
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101
- ; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
- ; CHECK-NEXT:      br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
- ; CHECK:         loop
-@@ -388,10 +375,9 @@ define void @test_08(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-LABEL: test_08(
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -14, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, 13
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101
- ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
- ; CHECK:         in.bounds.preloop:
- ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
-diff --git a/test/Transforms/IRCE/rc-negative-bound.ll b/test/Transforms/IRCE/rc-negative-bound.ll
-index bfc0cd14778..d226bffeaae 100644
---- a/test/Transforms/IRCE/rc-negative-bound.ll
-+++ b/test/Transforms/IRCE/rc-negative-bound.ll
-@@ -114,49 +114,44 @@ define void @test_03(i32 *%arr, i32 %n, i32 %bound) {
- ; CHECK:       loop.preheader:
- ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
- ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
--; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
--; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]]
--; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[BOUND]]
--; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1
--; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1
--; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 -1, [[SMAX1]]
--; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1
--; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1
--; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SMAX2]], 1
--; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]]
--; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[TMP8]]
--; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 -1, [[N]]
--; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
--; CHECK-NEXT:    [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
--; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[SMAX3]]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0
--; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0
--; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
--; CHECK:       loop.preheader5:
-+; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
-+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]]
-+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0
-+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0
-+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1
-+; CHECK-NEXT:    [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1
-+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[SMIN1]], 1
-+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]]
-+; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]]
-+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0
-+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0
-+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
-+; CHECK:       loop.preheader4:
- ; CHECK-NEXT:    br label [[LOOP:%.*]]
- ; CHECK:       loop:
--; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ]
-+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ]
- ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
- ; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]]
--; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0
-+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0
- ; CHECK:       in.bounds:
- ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
- ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
- ; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
--; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
-+; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
- ; CHECK:       main.exit.selector:
- ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
--; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
--; CHECK-NEXT:    br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
-+; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
-+; CHECK-NEXT:    br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
- ; CHECK:       main.pseudo.exit:
- ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
- ; CHECK:       out.of.bounds.loopexit:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
--; CHECK:       out.of.bounds.loopexit6:
-+; CHECK:       out.of.bounds.loopexit5:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
- ; CHECK:       out.of.bounds:
- ; CHECK-NEXT:    ret void
-@@ -211,47 +206,41 @@ define void @test_04(i32 *%arr, i32 %n, i32 %bound) {
- ; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
- ; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
- ; CHECK:       loop.preheader:
--; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]]
--; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1
--; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
--; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]]
--; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
--; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 -1, [[SMAX]]
--; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1
--; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1
--; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SMAX1]], 1
--; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]]
--; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 -1, [[TMP7]]
--; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[N]]
--; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
--; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
--; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]]
--; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
--; CHECK:       loop.preheader2:
-+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0
-+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0
-+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]]
-+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1
-+; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1
-+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SMIN]], 1
-+; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]]
-+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]]
-+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]]
-+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
-+; CHECK:       loop.preheader1:
- ; CHECK-NEXT:    br label [[LOOP:%.*]]
- ; CHECK:       loop:
--; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ]
-+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ]
- ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
- ; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]]
--; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0
-+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0
- ; CHECK:       in.bounds:
- ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
- ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
- ; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
--; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
- ; CHECK:       main.exit.selector:
- ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
--; CHECK-NEXT:    br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
-+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
-+; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
- ; CHECK:       main.pseudo.exit:
- ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
- ; CHECK:       out.of.bounds.loopexit:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
--; CHECK:       out.of.bounds.loopexit3:
-+; CHECK:       out.of.bounds.loopexit2:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
- ; CHECK:       out.of.bounds:
- ; CHECK-NEXT:    ret void
-@@ -413,49 +402,44 @@ define void @test_07(i32 *%arr, i32 %n, i32 %bound) {
- ; CHECK:       loop.preheader:
- ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
- ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
--; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
--; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]]
--; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[BOUND]]
--; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1
--; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1
--; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 -1, [[SMAX1]]
--; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1
--; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1
--; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SMAX2]], 1
--; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]]
--; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[TMP8]]
--; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 -1, [[N]]
--; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
--; CHECK-NEXT:    [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
--; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[SMAX3]]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0
--; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0
--; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
--; CHECK:       loop.preheader5:
-+; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
-+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]]
-+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0
-+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0
-+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1
-+; CHECK-NEXT:    [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1
-+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[SMIN1]], 1
-+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]]
-+; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]]
-+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0
-+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0
-+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
-+; CHECK:       loop.preheader4:
- ; CHECK-NEXT:    br label [[LOOP:%.*]]
- ; CHECK:       loop:
--; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ]
-+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ]
- ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
- ; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]]
--; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0
-+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0
- ; CHECK:       in.bounds:
- ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
- ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
- ; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
--; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
-+; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
- ; CHECK:       main.exit.selector:
- ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
--; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
--; CHECK-NEXT:    br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
-+; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
-+; CHECK-NEXT:    br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
- ; CHECK:       main.pseudo.exit:
- ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
- ; CHECK:       out.of.bounds.loopexit:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
--; CHECK:       out.of.bounds.loopexit6:
-+; CHECK:       out.of.bounds.loopexit5:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
- ; CHECK:       out.of.bounds:
- ; CHECK-NEXT:    ret void
-@@ -512,47 +496,41 @@ define void @test_08(i32 *%arr, i32 %n, i32 %bound) {
- ; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
- ; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
- ; CHECK:       loop.preheader:
--; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]]
--; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1
--; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
--; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]]
--; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
--; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 -1, [[SMAX]]
--; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1
--; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1
--; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SMAX1]], 1
--; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]]
--; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 -1, [[TMP7]]
--; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[N]]
--; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
--; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
--; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]]
--; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
--; CHECK:       loop.preheader2:
-+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0
-+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0
-+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]]
-+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1
-+; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1
-+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SMIN]], 1
-+; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]]
-+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]]
-+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]]
-+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
-+; CHECK:       loop.preheader1:
- ; CHECK-NEXT:    br label [[LOOP:%.*]]
- ; CHECK:       loop:
--; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ]
-+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ]
- ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
- ; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]]
--; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0
-+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0
- ; CHECK:       in.bounds:
- ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
- ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
- ; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
--; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
- ; CHECK:       main.exit.selector:
- ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
--; CHECK-NEXT:    br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
-+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
-+; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
- ; CHECK:       main.pseudo.exit:
- ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
- ; CHECK:       out.of.bounds.loopexit:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
--; CHECK:       out.of.bounds.loopexit3:
-+; CHECK:       out.of.bounds.loopexit2:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
- ; CHECK:       out.of.bounds:
- ; CHECK-NEXT:    ret void
-diff --git a/test/Transforms/IRCE/single-access-no-preloop.ll b/test/Transforms/IRCE/single-access-no-preloop.ll
-index fb643139c6d..7bf36f7c254 100644
---- a/test/Transforms/IRCE/single-access-no-preloop.ll
-+++ b/test/Transforms/IRCE/single-access-no-preloop.ll
-@@ -86,15 +86,13 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3
- ; CHECK-LABEL: @single_access_no_preloop_with_offset(
- 
- ; CHECK: loop.preheader:
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
--; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
--; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
--; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
-+; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4
-+; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]]
-+; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]]
- ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
- ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
- ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
--; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit
-+; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader:[^ ,]+]], label %main.pseudo.exit
- 
- ; CHECK: loop:
- ; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
-diff --git a/test/Transforms/IRCE/single-access-with-preloop.ll b/test/Transforms/IRCE/single-access-with-preloop.ll
-index 6f3b0324e39..bd235aa4a73 100644
---- a/test/Transforms/IRCE/single-access-with-preloop.ll
-+++ b/test/Transforms/IRCE/single-access-with-preloop.ll
-@@ -34,11 +34,9 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
- ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647
- ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647
- ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
--; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]]
--; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]]
--; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]]
-+; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]]
-+; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]]
-+; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]]
- ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0
- ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0
- 
-@@ -46,17 +44,15 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
- ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647
- ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]]
- ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]]
--; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1
- ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
--; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len
--; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]]
--; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]]
-+; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]]
-+; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]]
-+; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]]
- ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0
- ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0
--; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648
--; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]]
--; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]]
--; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]]
-+; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]]
-+; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]]
-+; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]]
- ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0
- ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0
- 
-@@ -67,7 +63,7 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
- ; CHECK: %abc.high = icmp slt i32 %array.idx, %len
- ; CHECK: %abc.low = icmp sge i32 %array.idx, 0
- ; CHECK: %abc = and i1 true, true
--; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit11
-+; CHECK: br i1 %abc, label %in.bounds, label %[[loopexit:[^ ,]+]]
- 
- ; CHECK: in.bounds:
- ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]
-diff --git a/test/Transforms/IRCE/unsigned_comparisons_ugt.ll b/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
-index 8f00c733569..3451d65c7bb 100644
---- a/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
-+++ b/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
-@@ -58,8 +58,8 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK:        entry:
- ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
- ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
--; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
--; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
-+; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
-+; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
- ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at
- ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
- ; CHECK:        mainloop:
-@@ -149,8 +149,8 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK:        entry:
- ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
- ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
--; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
--; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
-+; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
-+; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
- ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at
- ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
- ; CHECK:        mainloop:
-diff --git a/test/Transforms/IRCE/unsigned_comparisons_ult.ll b/test/Transforms/IRCE/unsigned_comparisons_ult.ll
-index dc59c11df1b..aca3c3d192e 100644
---- a/test/Transforms/IRCE/unsigned_comparisons_ult.ll
-+++ b/test/Transforms/IRCE/unsigned_comparisons_ult.ll
-@@ -61,8 +61,8 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK:        entry:
- ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
- ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
--; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
--; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
-+; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
-+; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
- ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at
- ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
- ; CHECK:        mainloop:
-@@ -194,8 +194,8 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK:        entry:
- ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
- ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
--; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
--; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
-+; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
-+; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
- ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at
- ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
- ; CHECK:        mainloop:
-diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
-index ea3f6077231..d5232e1874c 100644
---- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
-+++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
-@@ -14,8 +14,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
- ; current LSR cost model.
- ; CHECK-NOT: = ptrtoint i8* undef to i64
- ; CHECK: .lr.ph
--; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1
--; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}}
- ; CHECK: ret void
- define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
- bb:
-diff --git a/test/Transforms/LoopVectorize/X86/pr35432.ll b/test/Transforms/LoopVectorize/X86/pr35432.ll
-index 1f2a2061586..6aaa13c183a 100644
---- a/test/Transforms/LoopVectorize/X86/pr35432.ll
-+++ b/test/Transforms/LoopVectorize/X86/pr35432.ll
-@@ -27,7 +27,6 @@ define i32 @main() local_unnamed_addr #0 {
- ; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[CONV17]], 0
- ; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END12:%.*]]
- ; CHECK:       for.body.lr.ph:
--; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[TMP2]]
- ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
- ; CHECK:       for.body:
- ; CHECK-NEXT:    [[STOREMERGE_IN9:%.*]] = phi i32 [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_INC9:%.*]] ]
-@@ -37,77 +36,74 @@ define i32 @main() local_unnamed_addr #0 {
- ; CHECK:       for.body8.lr.ph:
- ; CHECK-NEXT:    [[CONV3:%.*]] = trunc i32 [[STOREMERGE_IN9]] to i8
- ; CHECK-NEXT:    [[DOTPROMOTED:%.*]] = load i32, i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16
--; CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[CONV3]], -1
--; CHECK-NEXT:    [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
--; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 -1, [[TMP5]]
--; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], [[TMP6]]
--; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP3]], i32 [[TMP6]]
--; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[UMAX]], 2
--; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP5]]
--; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 8
-+; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[CONV3]], -1
-+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
-+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 1
-+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]]
-+; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMAX]]
-+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8
- ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
- ; CHECK:       vector.scevcheck:
--; CHECK-NEXT:    [[TMP10:%.*]] = add i8 [[CONV3]], -1
--; CHECK-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
--; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[TMP11]]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i32 [[TMP3]], [[TMP12]]
--; CHECK-NEXT:    [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP3]], i32 [[TMP12]]
--; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[UMAX1]], 1
--; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP11]]
--; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[TMP15]] to i8
--; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP16]])
-+; CHECK-NEXT:    [[TMP8:%.*]] = add i8 [[CONV3]], -1
-+; CHECK-NEXT:    [[TMP9:%.*]] = zext i8 [[TMP8]] to i32
-+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]]
-+; CHECK-NEXT:    [[UMAX1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]]
-+; CHECK-NEXT:    [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMAX1]]
-+; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8
-+; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]])
- ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
- ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
--; CHECK-NEXT:    [[TMP17:%.*]] = add i8 [[TMP10]], [[MUL_RESULT]]
--; CHECK-NEXT:    [[TMP18:%.*]] = sub i8 [[TMP10]], [[MUL_RESULT]]
--; CHECK-NEXT:    [[TMP19:%.*]] = icmp ugt i8 [[TMP18]], [[TMP10]]
--; CHECK-NEXT:    [[TMP20:%.*]] = icmp ult i8 [[TMP17]], [[TMP10]]
--; CHECK-NEXT:    [[TMP21:%.*]] = select i1 true, i1 [[TMP19]], i1 [[TMP20]]
--; CHECK-NEXT:    [[TMP22:%.*]] = icmp ugt i32 [[TMP15]], 255
--; CHECK-NEXT:    [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]]
--; CHECK-NEXT:    [[TMP24:%.*]] = or i1 [[TMP23]], [[MUL_OVERFLOW]]
--; CHECK-NEXT:    [[TMP25:%.*]] = or i1 false, [[TMP24]]
--; CHECK-NEXT:    br i1 [[TMP25]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
-+; CHECK-NEXT:    [[TMP13:%.*]] = add i8 [[TMP8]], [[MUL_RESULT]]
-+; CHECK-NEXT:    [[TMP14:%.*]] = sub i8 [[TMP8]], [[MUL_RESULT]]
-+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ugt i8 [[TMP14]], [[TMP8]]
-+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult i8 [[TMP13]], [[TMP8]]
-+; CHECK-NEXT:    [[TMP17:%.*]] = select i1 true, i1 [[TMP15]], i1 [[TMP16]]
-+; CHECK-NEXT:    [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255
-+; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
-+; CHECK-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]]
-+; CHECK-NEXT:    [[TMP21:%.*]] = or i1 false, [[TMP20]]
-+; CHECK-NEXT:    br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
- ; CHECK:       vector.ph:
--; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP9]], 8
--; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP9]], [[N_MOD_VF]]
-+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8
-+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]]
- ; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
- ; CHECK-NEXT:    [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]]
--; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
-+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
- ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
- ; CHECK:       vector.body:
- ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
--; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP26]], [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ]
--; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ]
--; CHECK-NEXT:    [[TMP27:%.*]] = trunc i32 [[INDEX]] to i8
--; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP27]]
-+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
-+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
-+; CHECK-NEXT:    [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8
-+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]]
- ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[OFFSET_IDX]], i32 0
- ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer
- ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 -1, i8 -2, i8 -3>
- ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 -4, i8 -5, i8 -6, i8 -7>
--; CHECK-NEXT:    [[TMP28:%.*]] = add i8 [[OFFSET_IDX]], 0
--; CHECK-NEXT:    [[TMP29:%.*]] = add i8 [[OFFSET_IDX]], -4
--; CHECK-NEXT:    [[TMP30]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
--; CHECK-NEXT:    [[TMP31]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
--; CHECK-NEXT:    [[TMP32:%.*]] = add i8 [[TMP28]], -1
--; CHECK-NEXT:    [[TMP33:%.*]] = add i8 [[TMP29]], -1
--; CHECK-NEXT:    [[TMP34:%.*]] = zext i8 [[TMP32]] to i32
--; CHECK-NEXT:    [[TMP35:%.*]] = zext i8 [[TMP33]] to i32
-+; CHECK-NEXT:    [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0
-+; CHECK-NEXT:    [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4
-+; CHECK-NEXT:    [[TMP26]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
-+; CHECK-NEXT:    [[TMP27]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
-+; CHECK-NEXT:    [[TMP28:%.*]] = add i8 [[TMP24]], -1
-+; CHECK-NEXT:    [[TMP29:%.*]] = add i8 [[TMP25]], -1
-+; CHECK-NEXT:    [[TMP30:%.*]] = zext i8 [[TMP28]] to i32
-+; CHECK-NEXT:    [[TMP31:%.*]] = zext i8 [[TMP29]] to i32
- ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
--; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
--; CHECK-NEXT:    br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
-+; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-+; CHECK-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
- ; CHECK:       middle.block:
--; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP31]], [[TMP30]]
-+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]]
- ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
- ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF]]
- ; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX4]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
- ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = add <4 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
--; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
--; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP9]], [[N_VEC]]
-+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
-+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]]
- ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
- ; CHECK:       scalar.ph:
- ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ]
--; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ]
-+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
- ; CHECK-NEXT:    br label [[FOR_BODY8:%.*]]
- ; CHECK:       for.body8:
- ; CHECK-NEXT:    [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ]
-@@ -118,7 +114,7 @@ define i32 @main() local_unnamed_addr #0 {
- ; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]]
- ; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop !2
- ; CHECK:       for.cond4.for.inc9_crit_edge:
--; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ]
-+; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
- ; CHECK-NEXT:    store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16
- ; CHECK-NEXT:    br label [[FOR_INC9]]
- ; CHECK:       for.inc9:
diff --git a/deps/patches/llvm-8.0-D55758-tablegen-cond.patch b/deps/patches/llvm-8.0-D55758-tablegen-cond.patch
deleted file mode 100644
index ae9e610883d7c6..00000000000000
--- a/deps/patches/llvm-8.0-D55758-tablegen-cond.patch
+++ /dev/null
@@ -1,794 +0,0 @@
-From 95135c5a18ee14ca091d3513cc7801521d4eb204 Mon Sep 17 00:00:00 2001
-From: Javed Absar <javed.absar@arm.com>
-Date: Fri, 25 Jan 2019 10:25:25 +0000
-Subject: [PATCH] [TblGen] Extend !if semantics through new feature !cond
-
-This patch extends TableGen language with !cond operator.
-Instead of embedding !if inside !if which can get cumbersome,
-one can now use !cond.
-Below is an example to convert an integer 'x' into a string:
-
-    !cond(!lt(x,0) : "Negative",
-          !eq(x,0) : "Zero",
-          !eq(x,1) : "One,
-          1        : "MoreThanOne")
-
-Reviewed By: hfinkel, simon_tatham, greened
-Differential Revision: https://reviews.llvm.org/D55758
-
-llvm-svn: 352185
----
- docs/TableGen/LangIntro.rst          |  14 +++
- docs/TableGen/LangRef.rst            |  10 +-
- include/llvm/TableGen/Record.h       |  78 ++++++++++++++++
- lib/TableGen/Record.cpp              | 131 +++++++++++++++++++++++++++
- lib/TableGen/TGLexer.cpp             |   1 +
- lib/TableGen/TGLexer.h               |   2 +-
- lib/TableGen/TGParser.cpp            |  90 ++++++++++++++++++
- lib/TableGen/TGParser.h              |   1 +
- test/TableGen/cond-bitlist.td        |  27 ++++++
- test/TableGen/cond-default.td        |  11 +++
- test/TableGen/cond-empty-list-arg.td |   8 ++
- test/TableGen/cond-inheritance.td    |  22 +++++
- test/TableGen/cond-let.td            |  36 ++++++++
- test/TableGen/cond-list.td           |  38 ++++++++
- test/TableGen/cond-subclass.td       |  27 ++++++
- test/TableGen/cond-type.td           |  11 +++
- test/TableGen/cond-usage.td          |  29 ++++++
- test/TableGen/condsbit.td            |  15 +++
- 18 files changed, 549 insertions(+), 2 deletions(-)
- create mode 100644 llvm/test/TableGen/cond-bitlist.td
- create mode 100644 llvm/test/TableGen/cond-default.td
- create mode 100644 llvm/test/TableGen/cond-empty-list-arg.td
- create mode 100644 llvm/test/TableGen/cond-inheritance.td
- create mode 100644 llvm/test/TableGen/cond-let.td
- create mode 100644 llvm/test/TableGen/cond-list.td
- create mode 100644 llvm/test/TableGen/cond-subclass.td
- create mode 100644 llvm/test/TableGen/cond-type.td
- create mode 100644 llvm/test/TableGen/cond-usage.td
- create mode 100644 llvm/test/TableGen/condsbit.td
-
-diff --git a/docs/TableGen/LangIntro.rst b/docs/TableGen/LangIntro.rst
-index ea46550ffc0..390f941f0ca 100644
---- a/docs/TableGen/LangIntro.rst
-+++ b/docs/TableGen/LangIntro.rst
-@@ -258,6 +258,20 @@ supported include:
- ``!if(a,b,c)``
-   'b' if the result of 'int' or 'bit' operator 'a' is nonzero, 'c' otherwise.
- 
-+``!cond(condition_1 : val1, condition_2 : val2, ..., condition_n : valn)``
-+    Instead of embedding !if inside !if which can get cumbersome,
-+    one can use !cond. !cond returns 'val1' if the result of 'int' or 'bit'
-+    operator 'condition1' is nonzero. Otherwise, it checks 'condition2'.
-+    If 'condition2' is nonzero, returns 'val2', and so on.
-+    If all conditions are zero, it reports an error.
-+
-+    Below is an example to convert an integer 'x' into a string:
-+
-+    !cond(!lt(x,0) : "Negative",
-+          !eq(x,0) : "Zero",
-+          !eq(x,1) : "One,
-+          1        : "MoreThanOne")
-+
- ``!eq(a,b)``
-     'bit 1' if string a is equal to string b, 0 otherwise.  This only operates
-     on string, int and bit objects.  Use !cast<string> to compare other types of
-diff --git a/docs/TableGen/LangRef.rst b/docs/TableGen/LangRef.rst
-index 2efee12ec9d..a3dbf363151 100644
---- a/docs/TableGen/LangRef.rst
-+++ b/docs/TableGen/LangRef.rst
-@@ -102,6 +102,12 @@ wide variety of meanings:
-                :!isa    !dag     !le      !lt        !ge
-                :!gt     !ne
- 
-+TableGen also has !cond operator that needs a slightly different
-+syntax compared to other "bang operators":
-+
-+.. productionlist::
-+   CondOperator: !cond
-+
- 
- Syntax
- ======
-@@ -140,7 +146,7 @@ considered to define the class if any of the following is true:
- #. The :token:`Body` in the :token:`ObjectBody` is present and is not empty.
- #. The :token:`BaseClassList` in the :token:`ObjectBody` is present.
- 
--You can declare an empty class by giving and empty :token:`TemplateArgList`
-+You can declare an empty class by giving an empty :token:`TemplateArgList`
- and an empty :token:`ObjectBody`. This can serve as a restricted form of
- forward declaration: note that records deriving from the forward-declared
- class will inherit no fields from it since the record expansion is done
-@@ -315,6 +321,8 @@ The initial :token:`DagArg` is called the "operator" of the dag.
- 
- .. productionlist::
-    SimpleValue: `BangOperator` ["<" `Type` ">"] "(" `ValueListNE` ")"
-+              :| `CondOperator` "(" `CondVal` ("," `CondVal`)* ")"
-+   CondVal: `Value` ":" `Value`
- 
- Bodies
- ------
-diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
-index e022bc82b4e..3ca67ec72bd 100644
---- a/include/llvm/TableGen/Record.h
-+++ b/include/llvm/TableGen/Record.h
-@@ -316,6 +316,7 @@ protected:
-     IK_TernOpInit,
-     IK_UnOpInit,
-     IK_LastOpInit,
-+    IK_CondOpInit,
-     IK_FoldOpInit,
-     IK_IsAOpInit,
-     IK_StringInit,
-@@ -912,6 +913,83 @@ public:
-   std::string getAsString() const override;
- };
- 
-+/// !cond(condition_1: value1, ... , condition_n: value)
-+/// Selects the first value for which condition is true.
-+/// Otherwise reports an error.
-+class CondOpInit final : public TypedInit, public FoldingSetNode,
-+                      public TrailingObjects<CondOpInit, Init *> {
-+  unsigned NumConds;
-+  RecTy *ValType;
-+
-+  CondOpInit(unsigned NC, RecTy *Type)
-+    : TypedInit(IK_CondOpInit, Type),
-+      NumConds(NC), ValType(Type) {}
-+
-+  size_t numTrailingObjects(OverloadToken<Init *>) const {
-+    return 2*NumConds;
-+  }
-+
-+public:
-+  CondOpInit(const CondOpInit &) = delete;
-+  CondOpInit &operator=(const CondOpInit &) = delete;
-+
-+  static bool classof(const Init *I) {
-+    return I->getKind() == IK_CondOpInit;
-+  }
-+
-+  static CondOpInit *get(ArrayRef<Init*> C, ArrayRef<Init*> V,
-+                        RecTy *Type);
-+
-+  void Profile(FoldingSetNodeID &ID) const;
-+
-+  RecTy *getValType() const { return ValType; }
-+
-+  unsigned getNumConds() const { return NumConds; }
-+
-+  Init *getCond(unsigned Num) const {
-+    assert(Num < NumConds && "Condition number out of range!");
-+    return getTrailingObjects<Init *>()[Num];
-+  }
-+
-+  Init *getVal(unsigned Num) const {
-+    assert(Num < NumConds && "Val number out of range!");
-+    return getTrailingObjects<Init *>()[Num+NumConds];
-+  }
-+
-+  ArrayRef<Init *> getConds() const {
-+    return makeArrayRef(getTrailingObjects<Init *>(), NumConds);
-+  }
-+
-+  ArrayRef<Init *> getVals() const {
-+    return makeArrayRef(getTrailingObjects<Init *>()+NumConds, NumConds);
-+  }
-+
-+  Init *Fold(Record *CurRec) const;
-+
-+  Init *resolveReferences(Resolver &R) const override;
-+
-+  bool isConcrete() const override;
-+  bool isComplete() const override;
-+  std::string getAsString() const override;
-+
-+  using const_case_iterator = SmallVectorImpl<Init*>::const_iterator;
-+  using const_val_iterator = SmallVectorImpl<Init*>::const_iterator;
-+
-+  inline const_case_iterator  arg_begin() const { return getConds().begin(); }
-+  inline const_case_iterator  arg_end  () const { return getConds().end(); }
-+
-+  inline size_t              case_size () const { return NumConds; }
-+  inline bool                case_empty() const { return NumConds == 0; }
-+
-+  inline const_val_iterator name_begin() const { return getVals().begin();}
-+  inline const_val_iterator name_end  () const { return getVals().end(); }
-+
-+  inline size_t              val_size () const { return NumConds; }
-+  inline bool                val_empty() const { return NumConds == 0; }
-+
-+  Init *getBit(unsigned Bit) const override;
-+};
-+
- /// !foldl (a, b, expr, start, lst) - Fold over a list.
- class FoldOpInit : public TypedInit, public FoldingSetNode {
- private:
-diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
-index cf1685a2e8c..26ffe761b66 100644
---- a/lib/TableGen/Record.cpp
-+++ b/lib/TableGen/Record.cpp
-@@ -1694,6 +1694,137 @@ Init *FieldInit::Fold(Record *CurRec) const {
-   return const_cast<FieldInit *>(this);
- }
- 
-+static void ProfileCondOpInit(FoldingSetNodeID &ID,
-+                             ArrayRef<Init *> CondRange,
-+                             ArrayRef<Init *> ValRange,
-+                             const RecTy *ValType) {
-+  assert(CondRange.size() == ValRange.size() &&
-+         "Number of conditions and values must match!");
-+  ID.AddPointer(ValType);
-+  ArrayRef<Init *>::iterator Case = CondRange.begin();
-+  ArrayRef<Init *>::iterator Val = ValRange.begin();
-+
-+  while (Case != CondRange.end()) {
-+    ID.AddPointer(*Case++);
-+    ID.AddPointer(*Val++);
-+  }
-+}
-+
-+void CondOpInit::Profile(FoldingSetNodeID &ID) const {
-+  ProfileCondOpInit(ID,
-+      makeArrayRef(getTrailingObjects<Init *>(), NumConds),
-+      makeArrayRef(getTrailingObjects<Init *>() + NumConds, NumConds),
-+      ValType);
-+}
-+
-+CondOpInit *
-+CondOpInit::get(ArrayRef<Init *> CondRange,
-+                ArrayRef<Init *> ValRange, RecTy *Ty) {
-+  assert(CondRange.size() == ValRange.size() &&
-+         "Number of conditions and values must match!");
-+
-+  static FoldingSet<CondOpInit> ThePool;
-+  FoldingSetNodeID ID;
-+  ProfileCondOpInit(ID, CondRange, ValRange, Ty);
-+
-+  void *IP = nullptr;
-+  if (CondOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
-+    return I;
-+
-+  void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(2*CondRange.size()),
-+                                 alignof(BitsInit));
-+  CondOpInit *I = new(Mem) CondOpInit(CondRange.size(), Ty);
-+
-+  std::uninitialized_copy(CondRange.begin(), CondRange.end(),
-+                          I->getTrailingObjects<Init *>());
-+  std::uninitialized_copy(ValRange.begin(), ValRange.end(),
-+                          I->getTrailingObjects<Init *>()+CondRange.size());
-+  ThePool.InsertNode(I, IP);
-+  return I;
-+}
-+
-+Init *CondOpInit::resolveReferences(Resolver &R) const {
-+  SmallVector<Init*, 4> NewConds;
-+  bool Changed = false;
-+  for (const Init *Case : getConds()) {
-+    Init *NewCase = Case->resolveReferences(R);
-+    NewConds.push_back(NewCase);
-+    Changed |= NewCase != Case;
-+  }
-+
-+  SmallVector<Init*, 4> NewVals;
-+  for (const Init *Val : getVals()) {
-+    Init *NewVal = Val->resolveReferences(R);
-+    NewVals.push_back(NewVal);
-+    Changed |= NewVal != Val;
-+  }
-+
-+  if (Changed)
-+    return (CondOpInit::get(NewConds, NewVals,
-+            getValType()))->Fold(R.getCurrentRecord());
-+
-+  return const_cast<CondOpInit *>(this);
-+}
-+
-+Init *CondOpInit::Fold(Record *CurRec) const {
-+  for ( unsigned i = 0; i < NumConds; ++i) {
-+    Init *Cond = getCond(i);
-+    Init *Val = getVal(i);
-+
-+    if (IntInit *CondI = dyn_cast_or_null<IntInit>(
-+            Cond->convertInitializerTo(IntRecTy::get()))) {
-+      if (CondI->getValue())
-+        return Val->convertInitializerTo(getValType());
-+    } else
-+     return const_cast<CondOpInit *>(this);
-+  }
-+
-+  PrintFatalError(CurRec->getLoc(),
-+                  CurRec->getName() +
-+                  " does not have any true condition in:" +
-+                  this->getAsString());
-+  return nullptr;
-+}
-+
-+bool CondOpInit::isConcrete() const {
-+  for (const Init *Case : getConds())
-+    if (!Case->isConcrete())
-+      return false;
-+
-+  for (const Init *Val : getVals())
-+    if (!Val->isConcrete())
-+      return false;
-+
-+  return true;
-+}
-+
-+bool CondOpInit::isComplete() const {
-+  for (const Init *Case : getConds())
-+    if (!Case->isComplete())
-+      return false;
-+
-+  for (const Init *Val : getVals())
-+    if (!Val->isConcrete())
-+      return false;
-+
-+  return true;
-+}
-+
-+std::string CondOpInit::getAsString() const {
-+  std::string Result = "!cond(";
-+  for (unsigned i = 0; i < getNumConds(); i++) {
-+    Result += getCond(i)->getAsString() + ": ";
-+    Result += getVal(i)->getAsString();
-+    if (i != getNumConds()-1)
-+      Result += ", ";
-+  }
-+  return Result + ")";
-+}
-+
-+Init *CondOpInit::getBit(unsigned Bit) const {
-+  return VarBitInit::get(const_cast<CondOpInit *>(this), Bit);
-+}
-+
- static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, StringInit *VN,
-                            ArrayRef<Init *> ArgRange,
-                            ArrayRef<StringInit *> NameRange) {
-diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
-index 16aeee56107..f733cc3c134 100644
---- a/lib/TableGen/TGLexer.cpp
-+++ b/lib/TableGen/TGLexer.cpp
-@@ -545,6 +545,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
-     .Case("ge", tgtok::XGe)
-     .Case("gt", tgtok::XGt)
-     .Case("if", tgtok::XIf)
-+    .Case("cond", tgtok::XCond)
-     .Case("isa", tgtok::XIsA)
-     .Case("head", tgtok::XHead)
-     .Case("tail", tgtok::XTail)
-diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
-index e9980b36b97..9bdb01cf3dd 100644
---- a/lib/TableGen/TGLexer.h
-+++ b/lib/TableGen/TGLexer.h
-@@ -51,7 +51,7 @@ namespace tgtok {
- 
-     // !keywords.
-     XConcat, XADD, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
--    XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XEq, XIsA, XDag,
-+    XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, XDag,
-     XNe, XLe, XLt, XGe, XGt,
- 
-     // Integer value.
-diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
-index 1d1f3603c83..200190acd59 100644
---- a/lib/TableGen/TGParser.cpp
-+++ b/lib/TableGen/TGParser.cpp
-@@ -1445,6 +1445,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
-     return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
-   }
- 
-+  case tgtok::XCond:
-+    return ParseOperationCond(CurRec, ItemType);
-+
-   case tgtok::XFoldl: {
-     // Value ::= !foldl '(' Id ',' Id ',' Value ',' Value ',' Value ')'
-     Lex.Lex(); // eat the operation
-@@ -1603,6 +1606,91 @@ RecTy *TGParser::ParseOperatorType() {
-   return Type;
- }
- 
-+Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
-+  Lex.Lex();  // eat the operation 'cond'
-+
-+  if (Lex.getCode() != tgtok::l_paren) {
-+     TokError("expected '(' after !cond operator");
-+     return nullptr;
-+  }
-+  Lex.Lex();  // eat the '('
-+
-+  // Parse through '[Case: Val,]+'
-+  SmallVector<Init *, 4> Case;
-+  SmallVector<Init *, 4> Val;
-+  while (true) {
-+    if (Lex.getCode() == tgtok::r_paren) {
-+      Lex.Lex(); // eat the ')'
-+      break;
-+    }
-+
-+    Init *V = ParseValue(CurRec);
-+    if (!V)
-+      return nullptr;
-+    Case.push_back(V);
-+
-+    if (Lex.getCode() != tgtok::colon) {
-+      TokError("expected ':'  following a condition in !cond operator");
-+      return nullptr;
-+    }
-+    Lex.Lex(); // eat the ':'
-+
-+    V = ParseValue(CurRec, ItemType);
-+    if (!V)
-+      return nullptr;
-+    Val.push_back(V);
-+
-+    if (Lex.getCode() == tgtok::r_paren) {
-+      Lex.Lex(); // eat the ')'
-+      break;
-+    }
-+
-+    if (Lex.getCode() != tgtok::comma) {
-+      TokError("expected ',' or ')' following a value in !cond operator");
-+      return nullptr;
-+    }
-+    Lex.Lex();  // eat the ','
-+  }
-+
-+  if (Case.size() < 1) {
-+    TokError("there should be at least 1 'condition : value' in the !cond operator");
-+    return nullptr;
-+  }
-+
-+  // resolve type
-+  RecTy *Type = nullptr;
-+  for (Init *V : Val) {
-+    RecTy *VTy = nullptr;
-+    if (TypedInit *Vt = dyn_cast<TypedInit>(V))
-+      VTy = Vt->getType();
-+    if (BitsInit *Vbits = dyn_cast<BitsInit>(V))
-+      VTy = BitsRecTy::get(Vbits->getNumBits());
-+    if (isa<BitInit>(V))
-+      VTy = BitRecTy::get();
-+
-+    if (Type == nullptr) {
-+      if (!isa<UnsetInit>(V))
-+        Type = VTy;
-+    } else {
-+      if (!isa<UnsetInit>(V)) {
-+        RecTy *RType = resolveTypes(Type, VTy);
-+        if (!RType) {
-+          TokError(Twine("inconsistent types '") + Type->getAsString() +
-+                         "' and '" + VTy->getAsString() + "' for !cond");
-+          return nullptr;
-+        }
-+        Type = RType;
-+      }
-+    }
-+  }
-+
-+  if (!Type) {
-+    TokError("could not determine type for !cond from its arguments");
-+    return nullptr;
-+  }
-+  return CondOpInit::get(Case, Val, Type)->Fold(CurRec);
-+}
-+
- /// ParseSimpleValue - Parse a tblgen value.  This returns null on error.
- ///
- ///   SimpleValue ::= IDValue
-@@ -1621,6 +1709,7 @@ RecTy *TGParser::ParseOperatorType() {
- ///   SimpleValue ::= SRLTOK '(' Value ',' Value ')'
- ///   SimpleValue ::= LISTCONCATTOK '(' Value ',' Value ')'
- ///   SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
-+///   SimpleValue ::= COND '(' [Value ':' Value,]+ ')'
- ///
- Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
-                                  IDParseMode Mode) {
-@@ -1933,6 +2022,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
-   case tgtok::XListConcat:
-   case tgtok::XStrConcat:   // Value ::= !binop '(' Value ',' Value ')'
-   case tgtok::XIf:
-+  case tgtok::XCond:
-   case tgtok::XFoldl:
-   case tgtok::XForEach:
-   case tgtok::XSubst: {  // Value ::= !ternop '(' Value ',' Value ',' Value ')'
-diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
-index e3849043513..215b9dad770 100644
---- a/lib/TableGen/TGParser.h
-+++ b/lib/TableGen/TGParser.h
-@@ -194,6 +194,7 @@ private:  // Parser methods.
-   bool ParseRangePiece(SmallVectorImpl<unsigned> &Ranges);
-   RecTy *ParseType();
-   Init *ParseOperation(Record *CurRec, RecTy *ItemType);
-+  Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
-   RecTy *ParseOperatorType();
-   Init *ParseObjectName(MultiClass *CurMultiClass);
-   Record *ParseClassID();
-diff --git a/test/TableGen/cond-bitlist.td b/test/TableGen/cond-bitlist.td
-new file mode 100644
-index 00000000000..bce615838df
---- /dev/null
-+++ b/test/TableGen/cond-bitlist.td
-@@ -0,0 +1,27 @@
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class S<int s> {
-+  bits<2> val = !cond(!eq(s, 8):  {0, 0},
-+                      !eq(s, 16): 0b01,
-+                      !eq(s, 32): 2,
-+                      !eq(s, 64): {1, 1},
-+                              1 : ?);
-+}
-+
-+def D8  : S<8>;
-+def D16 : S<16>;
-+def D32 : S<32>;
-+def D64 : S<64>;
-+def D128: S<128>;
-+// CHECK: def D128
-+// CHECK-NEXT: bits<2> val = { ?, ? };
-+// CHECK: def D16
-+// CHECK-NEXT: bits<2> val = { 0, 1 };
-+// CHECK: def D32
-+// CHECK-NEXT: bits<2> val = { 1, 0 };
-+// CHECK: def D64
-+// CHECK-NEXT: bits<2> val = { 1, 1 };
-+// CHECK: def D8
-+// CHECK-NEXT: bits<2> val = { 0, 0 };
-+
-diff --git a/test/TableGen/cond-default.td b/test/TableGen/cond-default.td
-new file mode 100644
-index 00000000000..816bf10676f
---- /dev/null
-+++ b/test/TableGen/cond-default.td
-@@ -0,0 +1,11 @@
-+// Check that not specifying a valid condition results in error
-+
-+// RUN: not llvm-tblgen %s 2>&1 | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class C<int x> {
-+  string s  = !cond(!lt(x,0) : "negative", !gt(x,0) : "positive");
-+}
-+
-+def Zero : C<0>;
-+//CHECK: error: Zero does not have any true condition in:!cond(0: "negative", 0: "positive")
-diff --git a/test/TableGen/cond-empty-list-arg.td b/test/TableGen/cond-empty-list-arg.td
-new file mode 100644
-index 00000000000..5f4ccade169
---- /dev/null
-+++ b/test/TableGen/cond-empty-list-arg.td
-@@ -0,0 +1,8 @@
-+// RUN: llvm-tblgen %s
-+// XFAIL: vg_leak
-+
-+class C<bit cond> {
-+  bit true = 1;
-+  list<int> X = !cond(cond: [1, 2, 3], true : []);
-+  list<int> Y = !cond(cond: [], true : [4, 5, 6]);
-+}
-diff --git a/test/TableGen/cond-inheritance.td b/test/TableGen/cond-inheritance.td
-new file mode 100644
-index 00000000000..4b4abdf72f3
---- /dev/null
-+++ b/test/TableGen/cond-inheritance.td
-@@ -0,0 +1,22 @@
-+// Make sure !cond gets propagated across multiple layers of inheritance.
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class getInt<int c> {
-+  int ret = !cond(c: 0, 1 : 1);
-+}
-+
-+class I1<int c> {
-+  int i = getInt<c>.ret;
-+}
-+
-+class I2<int c> : I1<c>;
-+
-+def DI1: I1<1>;
-+// CHECK: def DI1 {     // I1
-+// CHECK-NEXT: int i = 0;
-+
-+// CHECK: def DI2 {     // I1 I2
-+// CHECK-NEXT: int i = 0;
-+def DI2: I2<1>;
-+
-diff --git a/test/TableGen/cond-let.td b/test/TableGen/cond-let.td
-new file mode 100644
-index 00000000000..044878f2ab8
---- /dev/null
-+++ b/test/TableGen/cond-let.td
-@@ -0,0 +1,36 @@
-+// Check support for `!cond' operator as part of a `let' statement.
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+
-+class C<bits<3> x, bits<4> y, bit z> {
-+  bits<16> n;
-+
-+  let n{11}  = !cond(y{3}: 1,
-+                     y{2}: x{0},
-+                     y{1}: x{1},
-+                     y{0}: x{2},
-+                     {1} :?);
-+  let n{10-9}= !cond(x{2}: y{3-2},
-+                     x{1}: y{2-1},
-+                     x{1}: y{1-0},
-+                     {1} : ?);
-+  let n{8-6} = !cond(x{2}: 0b010,  1 : 0b110);
-+  let n{5-4} = !cond(x{1}: y{3-2}, 1 :  {0, 1});
-+  let n{3-0} = !cond(x{0}: y{3-0}, 1 : {z, y{2}, y{1}, y{0}});
-+}
-+
-+
-+def C1 : C<{1, 0, 1}, {0, 1, 0, 1}, 0>;
-+def C2 : C<{0, 1, 0}, {1, 0, 1, 0}, 1>;
-+def C3 : C<{0, 0, 0}, {1, 0, 1, 0}, 0>;
-+def C4 : C<{0, 0, 0}, {0, 0, 0, 0}, 0>;
-+
-+// CHECK: def C1
-+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1 };
-+// CHECK: def C2
-+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0 };
-+// CHECK: def C3
-+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, ?, ?, 1, 1, 0, 0, 1, 0, 0, 1, 0 };
-+// CHECK: def C4
-+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, ?, ?, ?, 1, 1, 0, 0, 1, 0, 0, 0, 0 };
-diff --git a/test/TableGen/cond-list.td b/test/TableGen/cond-list.td
-new file mode 100644
-index 00000000000..aa013cea4e1
---- /dev/null
-+++ b/test/TableGen/cond-list.td
-@@ -0,0 +1,38 @@
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+
-+class A<list<list<int>> vals> {
-+  list<int> first = vals[0];
-+  list<int> rest  = !cond(!empty(!tail(vals)): vals[0],
-+                          1                 : vals[1]);
-+}
-+
-+def A_OneEl : A<[[1,2,3]]>;
-+// CHECK:      def A_OneEl {  // A
-+// CHECK-NEXT: list<int> first = [1, 2, 3];
-+// CHECK-NEXT: list<int> rest = [1, 2, 3];
-+// CHECK-NEXT: }
-+
-+def A_TwoEl : A<[[1,2,3], [4,5,6]]>;
-+// CHECK:      def A_TwoEl { // A
-+// CHECK-NEXT: list<int> first = [1, 2, 3];
-+// CHECK-NEXT: list<int> rest = [4, 5, 6];
-+// CHECK-NEXT: }
-+
-+
-+class B<list<int> v> {
-+  list<int> vals = v;
-+}
-+class BB<list<list<int>> vals> : B<!cond(!empty(!tail(vals)): vals[0],  1 : vals[1])>;
-+class BBB<list<list<int>> vals> : BB<vals>;
-+
-+def B_OneEl : BBB<[[1,2,3]]>;
-+// CHECK:      def B_OneEl { //  B BB BBB
-+// CHECK-NEXT: list<int> vals = [1, 2, 3];
-+// CHECK-NEXT: }
-+
-+def B_TwoEl : BBB<[[1,2,3],[4,5,6]]>;
-+// CHECK:      def B_TwoEl { // B BB BBB
-+// CHECK-NEXT: list<int> vals = [4, 5, 6];
-+// CHECK-NEXT: }
-diff --git a/test/TableGen/cond-subclass.td b/test/TableGen/cond-subclass.td
-new file mode 100644
-index 00000000000..9f6f6e2cb8c
---- /dev/null
-+++ b/test/TableGen/cond-subclass.td
-@@ -0,0 +1,27 @@
-+// Check that !cond with operands of different subtypes can
-+// initialize a supertype variable.
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class E<int dummy> {}
-+class E1<int dummy> : E<dummy> {}
-+class E2<int dummy> : E<dummy> {}
-+
-+class EX<int cc, E1 b, E2 c> {
-+  E x = !cond(cc: b, 1 : c);
-+}
-+
-+def E1d : E1<0>;
-+def E2d : E2<0>;
-+
-+def EXd1 : EX<1, E1d, E2d>;
-+def EXd2 : EX<0, E1d, E2d>;
-+
-+// CHECK: def EXd1 {
-+// CHECK:   E x = E1d;
-+// CHECK: }
-+//
-+// CHECK: def EXd2 {
-+// CHECK:   E x = E2d;
-+// CHECK: }
-+
-diff --git a/test/TableGen/cond-type.td b/test/TableGen/cond-type.td
-new file mode 100644
-index 00000000000..fd2a3cc52b7
---- /dev/null
-+++ b/test/TableGen/cond-type.td
-@@ -0,0 +1,11 @@
-+// RUN: not llvm-tblgen %s 2>&1 | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class A<int dummy> {}
-+class B<int dummy> : A<dummy> {}
-+class C<int dummy> : A<dummy> {}
-+
-+// CHECK: Value 'x' of type 'C' is incompatible with initializer '{{.*}}' of type 'A'
-+class X<int cc, B b, C c> {
-+  C x = !cond(cc: b, 1 : c);
-+}
-diff --git a/test/TableGen/cond-usage.td b/test/TableGen/cond-usage.td
-new file mode 100644
-index 00000000000..055fd6d7c69
---- /dev/null
-+++ b/test/TableGen/cond-usage.td
-@@ -0,0 +1,29 @@
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+// Check that !cond picks the first true value
-+// CHECK:       class A
-+// CHECK-NEXT:  string S = !cond(!eq(A:x, 10): "ten", !eq(A:x, 11): "eleven", !eq(A:x, 10): "TEN", !gt(A:x, 9): "MoreThanNine", 1: "unknown"); 
-+// CHECK: B1
-+// CHECK-NEXT: string S = "unknown"
-+// CHECK: B10
-+// CHECK-NEXT: string S = "ten";
-+// CHECK: def B11
-+// CHECK-NEXT: string S = "eleven";
-+// CHECK: def B12
-+// CHECK-NEXT:  string S = "MoreThanNine";
-+// CHECK: def B9
-+// CHECK-NEXT: string S = "unknown"
-+
-+class A<int x> {
-+  string S = !cond(!eq(x,10) : "ten",
-+                   !eq(x,11) : "eleven",
-+                   !eq(x,10) : "TEN",
-+                   !gt(x,9) : "MoreThanNine",
-+                   !eq(1,1) : "unknown");
-+}
-+def B1  : A<1>;
-+def B9  : A<9>;
-+def B10 : A<10>;
-+def B11 : A<11>;
-+def B12 : A<12>;
-diff --git a/test/TableGen/condsbit.td b/test/TableGen/condsbit.td
-new file mode 100644
-index 00000000000..e08ac97f68b
---- /dev/null
-+++ b/test/TableGen/condsbit.td
-@@ -0,0 +1,15 @@
-+// check that !cond works well with bit conditional values
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+// CHECK: a = 6
-+// CHECK: a = 5
-+
-+class A<bit b = 1> {
-+  bit true = 1;
-+  int a = !cond(b: 5, true : 6);
-+  bit c = !cond(b: 0, true : 1);
-+  bits<1> d = !cond(b: 0, true : 1);
-+}
-+
-+def X : A<0>;
-+def Y : A;
--- 
-2.17.1
-
diff --git a/deps/patches/llvm-8.0-D59389-refactor-wmma.patch b/deps/patches/llvm-8.0-D59389-refactor-wmma.patch
deleted file mode 100644
index 31af5246715a16..00000000000000
--- a/deps/patches/llvm-8.0-D59389-refactor-wmma.patch
+++ /dev/null
@@ -1,899 +0,0 @@
-From e9737bf498597707d084398b9485676dc7421644 Mon Sep 17 00:00:00 2001
-From: Artem Belevich <tra@google.com>
-Date: Thu, 25 Apr 2019 22:27:35 +0000
-Subject: [PATCH] [NVPTX] Refactor generation of MMA intrinsics and
- instructions. NFC.
-
-Generalized constructions of 'fragments' of MMA operations to provide
-common primitives for construction of the ops. This will make it easier
-to add new variants of the instructions that operate on integer types.
-
-Use nested foreach loops which makes it possible to better control
-naming of the intrinsics.
-
-This patch does not affect LLVM's output, so there are no test changes.
-
-Differential Revision: https://reviews.llvm.org/D59389
-
-llvm-svn: 359245
----
- include/llvm/IR/IntrinsicsNVVM.td   | 258 ++++++--------
- lib/Target/NVPTX/NVPTXIntrinsics.td | 512 ++++++++++------------------
- 2 files changed, 295 insertions(+), 475 deletions(-)
-
-diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
-index 7f694f68969..e30a27613a6 100644
---- a/include/llvm/IR/IntrinsicsNVVM.td
-+++ b/include/llvm/IR/IntrinsicsNVVM.td
-@@ -38,6 +38,69 @@ def llvm_anyi64ptr_ty     : LLVMAnyPointerType<llvm_i64_ty>;     // (space)i64*
- // MISC
- //
- 
-+// Helper class for construction of n-element list<LLVMtype> [t,t,...,t]
-+class RepLLVMType<int N, LLVMType T> {
-+  list<LLVMType> ret = !if(N, !listconcat(RepLLVMType<!add(N,-1), T>.ret, [T]), []);
-+}
-+
-+// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
-+// Geom: m<M>n<N>k<K>. E.g. m8n32k16
-+// Frag: [abcd]
-+// PtxEltType: PTX type for the element.
-+class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
-+  string geom = Geom;
-+  string frag = Frag;
-+  string ptx_elt_type = PtxEltType;
-+  string ft = frag#":"#ptx_elt_type;
-+  list<LLVMType> regs = !cond(
-+    // fp16 -> fp16/fp32 @  m16n16k16/m8n32k16/m32n8k16
-+    // All currently supported geometries use the same fragment format,
-+    // so we only need to consider {fragment, type}.
-+    !eq(ft,"a:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
-+    !eq(ft,"b:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
-+    !eq(ft,"c:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
-+    !eq(ft,"d:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
-+    !eq(ft,"c:f32") : RepLLVMType<8, llvm_float_ty>.ret,
-+    !eq(ft,"d:f32") : RepLLVMType<8, llvm_float_ty>.ret);
-+}
-+
-+class WMMA_NAME_LDST<string Op, WMMA_REGS Frag, string Layout, int WithStride> {
-+  string intr = "llvm.nvvm.wmma."
-+                # Frag.geom
-+                # "." # Op
-+                # "." # Frag.frag
-+                # "." # Layout
-+                # !if(WithStride, ".stride", "")
-+                # "." # Frag.ptx_elt_type
-+                ;
-+  // TODO(tra): record name should ideally use the same field order as the intrinsic.
-+  // E.g. string record = !subst("llvm", "int",
-+  //                      !subst(".", "_", llvm));
-+  string record = "int_nvvm_wmma_"
-+                # Frag.geom
-+                # "_" # Op
-+                # "_" # Frag.frag
-+                # "_" # Frag.ptx_elt_type
-+                # "_" # Layout
-+                # !if(WithStride, "_stride", "");
-+}
-+
-+class WMMA_NAME_MMA<string ALayout, string BLayout,
-+                    WMMA_REGS C, WMMA_REGS D,
-+                    int Satfinite> {
-+  string llvm = "llvm.nvvm.wmma."
-+                # C.geom
-+                # ".mma"
-+                # "." # ALayout
-+                # "." # BLayout
-+                # "." # D.ptx_elt_type  // Intrinsic encodes 'd' first.
-+                # "." # C.ptx_elt_type
-+                # !if(Satfinite, ".satfinite", "");
-+
-+  string record = !subst(".", "_",
-+                  !subst("llvm.", "int_", llvm));
-+}
-+
- let TargetPrefix = "nvvm" in {
-   def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
-       Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-@@ -3882,166 +3945,69 @@ def int_nvvm_match_all_sync_i64p :
- //
- // WMMA instructions
- //
--
- // WMMA.LOAD
--class NVVM_WMMA_LD_GALSTS<string Geometry, string Abc, string Layout,
--                          string Type, LLVMType regty, int WithStride>
--  : Intrinsic<!if(!eq(Abc#Type,"cf16"),
--                  [regty, regty, regty, regty],
--                  [regty, regty, regty, regty,
--                   regty, regty, regty, regty]),
-+class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
-+  : Intrinsic<Frag.regs,
-               !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
-               [IntrReadMem, IntrArgMemOnly, ReadOnly<0>, NoCapture<0>],
--              "llvm.nvvm.wmma."
--                # Geometry
--                # ".load"
--                # "." # Abc
--                # "." # Layout
--                # !if(WithStride, ".stride", "")
--                # "." # Type>;
--
--multiclass NVVM_WMMA_LD_GALT<string Geometry, string Abc, string Layout,
--                             string Type, LLVMType regty> {
--  def _stride: NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 1>;
--  def NAME   : NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 0>;
--}
--
--multiclass NVVM_WMMA_LD_GAT<string Geometry, string Abc,
--                           string Type, LLVMType regty> {
--  defm _row: NVVM_WMMA_LD_GALT<Geometry, Abc, "row", Type, regty>;
--  defm _col: NVVM_WMMA_LD_GALT<Geometry, Abc, "col", Type, regty>;
--}
--
--multiclass NVVM_WMMA_LD_G<string Geometry> {
--  defm _a_f16: NVVM_WMMA_LD_GAT<Geometry, "a", "f16", llvm_v2f16_ty>;
--  defm _b_f16: NVVM_WMMA_LD_GAT<Geometry, "b", "f16", llvm_v2f16_ty>;
--  defm _c_f16: NVVM_WMMA_LD_GAT<Geometry, "c", "f16", llvm_v2f16_ty>;
--  defm _c_f32: NVVM_WMMA_LD_GAT<Geometry, "c", "f32", llvm_float_ty>;
--}
--
--multiclass NVVM_WMMA_LD {
--  defm _m32n8k16_load: NVVM_WMMA_LD_G<"m32n8k16">;
--  defm _m16n16k16_load: NVVM_WMMA_LD_G<"m16n16k16">;
--  defm _m8n32k16_load: NVVM_WMMA_LD_G<"m8n32k16">;
--}
--
--defm int_nvvm_wmma: NVVM_WMMA_LD;
-+              WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
- 
- // WMMA.STORE.D
--class NVVM_WMMA_STD_GLSTS<string Geometry, string Layout,
--                          string Type, LLVMType regty, int WithStride,
--                          // This is only used to create a typed empty array we
--                          // need to pass to !if below.
--                          list<LLVMType>Empty=[]>
-+class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
-   : Intrinsic<[],
-               !listconcat(
-                 [llvm_anyptr_ty],
--                !if(!eq(Type,"f16"),
--                    [regty, regty, regty, regty],
--                    [regty, regty, regty, regty,
--                     regty, regty, regty, regty]),
--                !if(WithStride, [llvm_i32_ty], Empty)),
-+                Frag.regs,
-+                !if(WithStride, [llvm_i32_ty], [])),
-               [IntrWriteMem, IntrArgMemOnly, WriteOnly<0>, NoCapture<0>],
--              "llvm.nvvm.wmma."
--                   # Geometry
--                   # ".store.d"
--                   # "." # Layout
--                   # !if(WithStride, ".stride", "")
--                   # "." # Type>;
--
--multiclass NVVM_WMMA_STD_GLT<string Geometry, string Layout,
--                             string Type, LLVMType regty> {
--  def _stride: NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 1>;
--  def NAME:    NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 0>;
--}
--
--multiclass NVVM_WMMA_STD_GT<string Geometry, string Type, LLVMType regty> {
--  defm _row: NVVM_WMMA_STD_GLT<Geometry, "row", Type, regty>;
--  defm _col: NVVM_WMMA_STD_GLT<Geometry, "col", Type, regty>;
--}
--multiclass NVVM_WMMA_STD_G<string Geometry> {
--  defm _d_f16: NVVM_WMMA_STD_GT<Geometry, "f16", llvm_v2f16_ty>;
--  defm _d_f32: NVVM_WMMA_STD_GT<Geometry, "f32", llvm_float_ty>;
--}
--
--multiclass NVVM_WMMA_STD {
--  defm _m32n8k16_store:  NVVM_WMMA_STD_G<"m32n8k16">;
--  defm _m16n16k16_store: NVVM_WMMA_STD_G<"m16n16k16">;
--  defm _m8n32k16_store:  NVVM_WMMA_STD_G<"m8n32k16">;
-+              WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
-+
-+// Create all load/store variants 
-+foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+  foreach layout = ["row", "col"] in {
-+    foreach stride = [0, 1] in {
-+      foreach frag = [WMMA_REGS<geom, "a", "f16">,
-+                      WMMA_REGS<geom, "b", "f16">,
-+                      WMMA_REGS<geom, "c", "f16">,
-+                      WMMA_REGS<geom, "c", "f32">] in {
-+          def WMMA_NAME_LDST<"load", frag, layout, stride>.record
-+             : NVVM_WMMA_LD<frag, layout, stride>;
-+      }
-+      foreach frag = [WMMA_REGS<geom, "d", "f16">,
-+                      WMMA_REGS<geom, "d", "f32">] in {
-+          def WMMA_NAME_LDST<"store", frag, layout, stride>.record
-+             : NVVM_WMMA_ST<frag, layout, stride>;
-+      }
-+    }
-+  }
- }
- 
--defm int_nvvm_wmma: NVVM_WMMA_STD;
--
- // WMMA.MMA
--class NVVM_WMMA_MMA_GABDCS<string Geometry,
--                           string ALayout, string BLayout,
--                           string DType, LLVMType d_regty,
--                           string CType, LLVMType c_regty,
--                           string Satfinite = "">
--  : Intrinsic<!if(!eq(DType,"f16"),
--                      [d_regty, d_regty, d_regty, d_regty],
--                      [d_regty, d_regty, d_regty, d_regty,
--                       d_regty, d_regty, d_regty, d_regty]),
-+class NVVM_WMMA_MMA<string ALayout, string BLayout,
-+                    WMMA_REGS C, WMMA_REGS D, int Satfinite>
-+  : Intrinsic<D.regs,
-               !listconcat(
--                [// A
--                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
--                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
--                // B
--                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
--                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
--                !if(!eq(CType,"f16"),
--                      [c_regty, c_regty, c_regty, c_regty],
--                      [c_regty, c_regty, c_regty, c_regty,
--                       c_regty, c_regty, c_regty, c_regty])),
-+                WMMA_REGS<C.geom, "a", "f16">.regs,
-+                WMMA_REGS<C.geom, "b", "f16">.regs,
-+                C.regs),
-               [IntrNoMem],
--              "llvm.nvvm.wmma."
--                # Geometry
--                # ".mma"
--                # "." # ALayout
--                # "." # BLayout
--                # "." # DType
--                # "." # CType
--                # Satfinite> {
--}
--
--multiclass NVVM_WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
--                               string DType, LLVMType d_regty,
--                               string CType, LLVMType c_regty> {
--  def NAME : NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
--                                  DType, d_regty, CType, c_regty>;
--  def _satfinite: NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
--                                       DType, d_regty, CType, c_regty,".satfinite">;
--}
--
--multiclass NVVM_WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
--                              string DType, LLVMType d_regty> {
--  defm _f16: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
--                                "f16", llvm_v2f16_ty>;
--  defm _f32: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
--                                "f32", llvm_float_ty>;
--}
--
--multiclass NVVM_WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
--  defm _f16: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", llvm_v2f16_ty>;
--  defm _f32: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", llvm_float_ty>;
--}
--
--multiclass NVVM_WMMA_MMA_GA<string Geometry, string ALayout> {
--  defm _col: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "col">;
--  defm _row: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "row">;
--}
--
--multiclass NVVM_WMMA_MMA_G<string Geometry> {
--  defm _col: NVVM_WMMA_MMA_GA<Geometry, "col">;
--  defm _row: NVVM_WMMA_MMA_GA<Geometry, "row">;
--}
--
--multiclass NVVM_WMMA_MMA {
--  defm _m32n8k16_mma : NVVM_WMMA_MMA_G<"m32n8k16">;
--  defm _m16n16k16_mma : NVVM_WMMA_MMA_G<"m16n16k16">;
--  defm _m8n32k16_mma : NVVM_WMMA_MMA_G<"m8n32k16">;
-+              WMMA_NAME_MMA<ALayout, BLayout, C, D, Satfinite>.llvm>;
-+
-+foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+  foreach layout_a = ["row", "col"] in {
-+    foreach layout_b = ["row", "col"] in {
-+      foreach frag_c = [WMMA_REGS<geom, "c", "f16">,
-+                        WMMA_REGS<geom, "c", "f32">] in {
-+        foreach frag_d = [WMMA_REGS<geom, "d", "f16">,
-+                          WMMA_REGS<geom, "d", "f32">] in {
-+          foreach satf = [0, 1] in {
-+            def WMMA_NAME_MMA<layout_a, layout_b, frag_c, frag_d, satf>.record
-+             : NVVM_WMMA_MMA<layout_a, layout_b, frag_c, frag_d, satf>;
-+          }
-+        }
-+      }
-+    }
-+  }
- }
- 
--defm int_nvvm_wmma : NVVM_WMMA_MMA;
--
- } // let TargetPrefix = "nvvm"
-diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
-index 47dcdcf6e0b..b9a67ba5ed3 100644
---- a/lib/Target/NVPTX/NVPTXIntrinsics.td
-+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
-@@ -27,7 +27,17 @@ def immDouble1 : PatLeaf<(fpimm), [{
-     return (d==1.0);
- }]>;
- 
--
-+def AS_match {
-+  code generic = [{
-+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
-+  }];
-+  code shared = [{
-+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
-+  }];
-+  code global = [{
-+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
-+  }];
-+}
- 
- //-----------------------------------
- // Synchronization and shuffle functions
-@@ -1007,17 +1017,11 @@ def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$
- //-----------------------------------
- 
- class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
-- : PatFrag<ops, frag, [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
--}]>;
-+ : PatFrag<ops, frag, AS_match.global>;
- class ATOMIC_SHARED_CHK <dag ops, dag frag>
-- : PatFrag<ops, frag, [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
--}]>;
-+ : PatFrag<ops, frag, AS_match.shared>;
- class ATOMIC_GENERIC_CHK <dag ops, dag frag>
-- : PatFrag<ops, frag, [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
--}]>;
-+ : PatFrag<ops, frag, AS_match.generic>;
- 
- multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
-   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
-@@ -7381,36 +7385,60 @@ def INT_PTX_SREG_WARPSIZE :
-     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
-               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
- 
--//
--// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
--//
--
- class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
-+// Generates list of n sequential register names.
-+class RegSeq<int n, string prefix> {
-+  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
-+                                        [prefix # !add(n, -1)]),
-+                            []);
-+}
- 
--class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
--                        string Space, string Type, NVPTXRegClass regclass,
--                        DAGOperand SrcOp, bit WithStride>
--  : EmptyNVPTXInst,
--    Requires<[!if(!eq(Geometry, "m16n16k16"),
--                  hasPTX60,
--                  hasPTX61),
--              hasSM70]> {
--  // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic
--  // for this function.
--  PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_"
--                                       # Geometry # "_load_"
--                                       # !subst("c", "c_" # Type, Abc)
--                                       # "_" # Layout
--                                       # !subst(".", "_", Space)
--                                       # !if(WithStride,"_stride", "")
--                                       # "_Intr");
--  dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3);
--  dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7);
--  dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47));
--
--  dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
--  dag Ins = !con((ins SrcOp:$src), StrideArg);
-+// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
-+// In addition to target-independent fields provided by WMMA_REGS, it adds
-+// the fields commonly used to implement specific PTX instruction -- register
-+// types and names, constraints, parts of assembly, etc.
-+class WMMA_REGINFO<string Geom, string Frag, string PtxEltType>
-+      : WMMA_REGS<Geom, Frag, PtxEltType> {
-+  // NVPTX register types used to carry fragment data.
-+  NVPTXRegClass regclass = !cond(
-+    !eq(PtxEltType, "f16") : Float16x2Regs,
-+    !eq(PtxEltType, "f32") : Float32Regs);
-+
-+  // Instruction input/output arguments for the fragment.
-+  list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
-+
-+  // List of register names for the fragment -- ["ra0", "ra1",...]
-+  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
-+  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
-+  string regstring = "{{$" # !head(reg_names)
-+                           # !foldl("", !tail(reg_names), a, b,
-+                                    !strconcat(a, ", $", b))
-+                     # "}}";
-+
-+  // Predicates for particular fragment variant. Technically those are
-+  // per-instruction predicates, but currently all fragments that can be used in
-+  // a given instruction are subject to the same constraints, so an instruction
-+  // can use predicates from any of its fragments. If/when this is no
-+  // longer the case, we can concat all per-fragment predicates to enforce that
-+  // all fragments of the instruction are viable.
-+  list<Predicate> Predicates = !cond(
-+    // fp16 -> fp16/fp32 @ m16n16k16
-+    !and(!eq(Geom, "m16n16k16"),
-+         !or(!eq(PtxEltType, "f16"),
-+             !eq(PtxEltType, "f32"))) : [hasSM70, hasPTX60],
-+
-+    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
-+    !and(!or(!eq(Geom, "m8n32k16"),
-+             !eq(Geom, "m32n8k16")),
-+         !or(!eq(PtxEltType, "f16"),
-+             !eq(PtxEltType, "f32"))) : [hasSM70, hasPTX61]);
-+
-+  // template DAGs for instruction inputs/output.
-+  dag Outs = !dag(outs, ptx_regs, reg_names);
-+  dag Ins = !dag(ins, ptx_regs, reg_names);
-+}
- 
-+class BuildPattern<dag Outs, PatFrag IntrMatcher, dag Ins> {
-   // Build a dag pattern that matches the intrinsic call.
-   // We want a dag that looks like this:
-   // (set <output args>, (intrinsic <input arguments>)) where input and
-@@ -7431,277 +7459,127 @@ class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
-                               !subst(ins, IntrMatcher, tmp)))));
-   // Finally, consatenate both parts together. !con() requires both dags to have
-   // the same operator, so we wrap PatArgs in a (set ...) dag.
--  let Pattern = [!con(PatOuts, (set PatArgs))];
--  let OutOperandList = Outs;
--  let InOperandList = Ins;
--  let AsmString = "wmma.load."
--                  # Abc
--                  # ".sync"
--                  # "." # Layout
--                  # "." # Geometry
--                  # Space
--                  # "." # Type # " \t"
--                  # !if(!eq(Abc#Type, "cf16"),
--                        "{{$r0, $r1, $r2, $r3}}",
--                        "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
--                  # ", [$src]"
--                  # !if(WithStride, ", $ldm", "")
--                  # ";";
-+  dag ret = !con(PatOuts, (set PatArgs));
- }
- 
--class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout,
--                            string Space, string Type, bit WithStride>
-+//
-+// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
-+//
-+
-+class WMMA_LOAD_INTR_HELPER<WMMA_REGINFO Frag, string Layout, string Space,
-+                            bit WithStride>
-                            : PatFrag <(ops),(ops)> {
-   // Intrinsic that matches this instruction.
--  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma"
--                                    # "_" # Geometry # "_load_"
--                                    # Abc # "_" # Type # "_" # Layout
--                                    # !if(WithStride,"_stride", ""));
--  code match_generic = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
--  }];
--  code match_shared = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
--  }];
--  code match_global = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
--  }];
--
-+  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_LDST<"load", Frag, Layout,
-+                                                   WithStride>.record);
-   let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
-   let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
--  let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
--                      !if(!eq(Space, ".global"), match_global, match_generic));
--}
--
--multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout,
--                            string Space, string Type, NVPTXRegClass regclass,
--                            bit WithStride> {
--  def _avar:  WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                imem, WithStride>;
--  def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                Int32Regs, WithStride>;
--  def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                Int64Regs, WithStride>;
--  def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                MEMri, WithStride>;
--  def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                MEMri64, WithStride>;
-+  let PredicateCode = !cond(!eq(Space, ".shared"): AS_match.shared,
-+                            !eq(Space, ".global"): AS_match.global,
-+                            1: AS_match.generic);
- }
- 
--multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout,
--                             string Space, string Type, NVPTXRegClass regclass,
--                             bit WithStride> {
--  // Define a PatFrag that matches appropriate intrinsic that loads from the
--  // given address space.
--  def _Intr:  WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type,
--                                    WithStride>;
--  defm NAME:  WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass,
--                               WithStride>;
--}
--
--multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout,
--                           string Space, string Type, NVPTXRegClass regclass> {
--  defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>;
--  defm NAME:    WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>;
--}
--
--multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout,
--                          string Type, NVPTXRegClass regclass> {
--  defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global",
--                                Type, regclass>;
--  defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared",
--                                Type, regclass>;
--  defm NAME:    WMMA_LOAD_GALST<Geometry, Abc, Layout,        "",
--                                Type, regclass>;
--}
--
--multiclass WMMA_LOAD_GAT<string Geometry, string Abc,
--                         string Type, NVPTXRegClass regclass> {
--  defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>;
--  defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>;
--}
-+class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
-+                DAGOperand SrcOp>
-+  : EmptyNVPTXInst,
-+    Requires<Frag.Predicates> {
-+  // Pattern that matches the intrinsic for this instruction variant.
-+  PatFrag IntrMatcher = WMMA_LOAD_INTR_HELPER<Frag, Layout, Space, WithStride>;
-+  dag Ins = !con((ins SrcOp:$src), !if(WithStride, (ins Int32Regs:$ldm), (ins)));
- 
--multiclass WMMA_LOAD_G<string Geometry> {
--  defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>;
--  defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>;
--  defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>;
--  defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>;
-+  let Pattern = [BuildPattern<Frag.Outs, IntrMatcher, Ins>.ret];
-+  let OutOperandList = Frag.Outs;
-+  let InOperandList = Ins;
-+  let AsmString = "wmma.load."
-+                  # Frag.frag
-+                  # ".sync"
-+                  # "." # Layout
-+                  # "." # Frag.geom
-+                  # Space
-+                  # "." # Frag.ptx_elt_type # " \t"
-+                  # Frag.regstring
-+                  # ", [$src]"
-+                  # !if(WithStride, ", $ldm", "")
-+                  # ";";
- }
- 
--defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">;
--defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">;
--defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">;
--
- //
- // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
- //
--class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space,
--                          string Type, NVPTXRegClass regclass,
--                          bit WithStride, DAGOperand DstOp>
-+class WMMA_STORE_INTR_HELPER<WMMA_REGINFO Frag, string Layout, string Space,
-+                             bit WithStride>
-+                            : PatFrag <(ops),(ops)> {
-+  // Intrinsic that matches this instruction.
-+  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_LDST<"store", Frag, Layout,
-+                                                   WithStride>.record);
-+  let Operands = !con((ops node:$dst),
-+                      !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
-+                      !if(WithStride, (ops node:$ldm), (ops)));
-+  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
-+  let PredicateCode = !cond(!eq(Space, ".shared"): AS_match.shared,
-+                            !eq(Space, ".global"): AS_match.global,
-+                            1: AS_match.generic);
-+}
-+
-+class WMMA_STORE<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
-+                 DAGOperand DstOp>
-   : EmptyNVPTXInst,
--    Requires<[!if(!eq(Geometry, "m16n16k16"),
--                  hasPTX60,
--                  hasPTX61),
--              hasSM70]> {
--  PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA"
--                                       # "_" # Geometry # "_store_d"
--                                       # "_" # Type
--                                       # "_" # Layout
--                                       # !subst(".", "_", Space)
--                                       # !if(WithStride,"_stride", "")
--                                       # "_Intr");
--  dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1,
--                                regclass:$r2, regclass:$r3);
--  dag InsR47 = (ins regclass:$r4, regclass:$r5,
--                    regclass:$r6, regclass:$r7);
--  dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47));
--  dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
--  dag Ins = !con(InsR, StrideArg);
--
--  // Construct the pattern to match corresponding intrinsic call. See the
--  // details in the comments in WMMA_LOAD_ALSTOS.
--  dag PatArgs = !foreach(tmp, Ins,
--                              !subst(imem, ADDRvar,
--                              !subst(MEMri64, ADDRri64,
--                              !subst(MEMri, ADDRri,
--                              !subst(ins, IntrMatcher, tmp)))));
--  let Pattern = [PatArgs];
-+    Requires<Frag.Predicates> {
-+  PatFrag IntrMatcher = WMMA_STORE_INTR_HELPER<Frag, Layout, Space, WithStride>;
-+  dag Ins = !con((ins DstOp:$src),
-+                 Frag.Ins,
-+                 !if(WithStride, (ins Int32Regs:$ldm), (ins)));
-+  let Pattern = [BuildPattern<(set), IntrMatcher, Ins>.ret];
-   let OutOperandList = (outs);
-   let InOperandList = Ins;
-   let AsmString = "wmma.store.d.sync."
-                   # Layout
--                  # "." # Geometry
-+                  # "." # Frag.geom
-                   # Space
--                  # "." # Type
-+                  # "." # Frag.ptx_elt_type
-                   # " \t[$src],"
--                  # !if(!eq(Type,"f16"),
--                        "{{$r0, $r1, $r2, $r3}}",
--                        "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
-+                  # Frag.regstring
-                   # !if(WithStride, ", $ldm", "")
-                   # ";";
--
- }
- 
--class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space,
--                             string Type, bit WithStride>
--                            : PatFrag <(ops),(ops)> {
--  // Intrinsic that matches this instruction.
--  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
--                                    # Geometry
--                                    # "_store_d"
--                                    # "_" # Type
--                                    # "_" # Layout
--                                    # !if(WithStride, "_stride", ""));
--  code match_generic = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
--  }];
--  code match_shared = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
--  }];
--  code match_global = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
--  }];
--
--  dag Args = !if(!eq(Type,"f16"),
--                 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3),
--                 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3,
--                                 node:$r4, node:$r5, node:$r6, node:$r7));
--  dag StrideArg = !if(WithStride, (ops node:$ldm), (ops));
--  let Operands = !con(Args, StrideArg);
--  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
--  let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
--                      !if(!eq(Space, ".global"), match_global, match_generic));
--}
--
--multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space,
--                              string Type, NVPTXRegClass regclass,
--                              bit WithStride> {
--  def _avar:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, imem>;
--  def _areg:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, Int32Regs>;
--  def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, Int64Regs>;
--  def _ari:    WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, MEMri>;
--  def _ari64:  WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, MEMri64>;
--}
--
--multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space,
--                               string Type, NVPTXRegClass regclass,
--                               bit WithStride> {
--  // Define a PatFrag that matches appropriate intrinsic that loads from the
--  // given address space.
--  def _Intr:    WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type,
--                                       WithStride>;
--  defm NAME:    WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass,
--                                   WithStride>;
--}
--
--multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space,
--                             string Type, NVPTXRegClass regclass > {
--  defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>;
--  defm NAME:    WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>;
--}
--
--multiclass WMMA_STORE_D_GLT<string Geometry, string Layout,
--                           string Type, NVPTXRegClass regclass> {
--  defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>;
--  defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>;
--  defm NAME:    WMMA_STORE_D_GLST<Geometry, Layout,        "", Type, regclass>;
--}
--
--multiclass WMMA_STORE_D_GT<string Geometry, string Type,
--                           NVPTXRegClass regclass> {
--  defm _row:    WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>;
--  defm _col:    WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>;
--}
--
--multiclass WMMA_STORE_D_G<string Geometry> {
--  defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>;
--  defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>;
--}
--
--defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">;
--defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">;
--defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">;
-+// Create all load/store variants
-+foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+  foreach layout = ["row", "col"] in {
-+    foreach stride = [0, 1] in {
-+      foreach space = [".global", ".shared", ""] in {
-+        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
-+          foreach frag = [WMMA_REGINFO<geom, "a", "f16">,
-+                          WMMA_REGINFO<geom, "b", "f16">,
-+                          WMMA_REGINFO<geom, "c", "f16">,
-+                          WMMA_REGINFO<geom, "c", "f32">] in {
-+              def : WMMA_LOAD<frag, layout, space, stride, addr>;
-+          }
-+          foreach frag = [WMMA_REGINFO<geom, "d", "f16">,
-+                          WMMA_REGINFO<geom, "d", "f32">] in {
-+              def : WMMA_STORE<frag, layout, space, stride, addr>;
-+          }
-+        } // addr
-+      } // space
-+    } // stride
-+  } // layout
-+} // geom
- 
- // WMMA.MMA
--class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
--                     string DType, NVPTXRegClass d_reg,
--                     string CType, NVPTXRegClass c_reg,
--                     NVPTXRegClass ab_reg,
--                     string Satfinite = "">
-+class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
-+               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
-+               string ALayout, string BLayout, int Satfinite>
-   : EmptyNVPTXInst,
--    Requires<[!if(!eq(Geometry, "m16n16k16"),
--                  hasPTX60,
--                  hasPTX61),
--              hasSM70]> {
--  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
--                                    # Geometry
--                                    # "_mma"
--                                    # "_" # ALayout
--                                    # "_" # BLayout
--                                    # "_" # DType
--                                    # "_" # CType
--                                    # !subst(".", "_", Satfinite));
--  dag Outs = !if(!eq(DType,"f16"),
--                 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3),
--                 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3,
--                       d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7));
--  dag InsExtraCArgs = !if(!eq(CType,"f16"),
--                          (ins),
--                          (ins c_reg:$c4,  c_reg:$c5,  c_reg:$c6,  c_reg:$c7));
--  dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3,
--                      ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7,
--                      ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3,
--                      ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7,
--                      c_reg:$c0,  c_reg:$c1,  c_reg:$c2,  c_reg:$c3),
--                  InsExtraCArgs);
--
--  // Construct the pattern to match corresponding intrinsic call. See the
--  // details in the comments in WMMA_LOAD_ALSTOS.
-+    Requires<FragC.Predicates> {
-+  //Intrinsic Intr = int_nvvm_suld_1d_v4i32_zero;
-+  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_MMA<ALayout, BLayout, FragC, FragD, Satfinite>.record);
-+  dag Outs = FragD.Outs;
-+  dag Ins = !con(FragA.Ins,
-+                 FragB.Ins,
-+                 FragC.Ins);
-+
-+  // Construct the pattern to match corresponding intrinsic call.
-+  // mma does not load/store anything, so we don't need complex operand matching here.
-   dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
-   dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
-   let Pattern = [!con(PatOuts, (set PatArgs))];
-@@ -7710,54 +7588,30 @@ class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
-   let AsmString = "wmma.mma.sync."
-                   # ALayout
-                   # "." # BLayout
--                  # "." # Geometry
--                  # "." # DType
--                  # "." # CType
--                  # Satfinite # "\n\t\t"
--                  # !if(!eq(DType,"f16"),
--                        "{{$d0, $d1, $d2, $d3}}, \n\t\t",
--                        "{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t")
--                  # "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t"
--                  # "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t"
--                  # !if(!eq(CType,"f16"),
--                        "{{$c0, $c1, $c2, $c3}};",
--                        "{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};");
--}
--
--multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
--                         string DType, NVPTXRegClass d_reg,
--                         string CType, NVPTXRegClass c_reg> {
--  def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
--                                 DType, d_reg, CType, c_reg,
--                                 Float16x2Regs, ".satfinite">;
--  def NAME:       WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
--                                 DType, d_reg, CType, c_reg,
--                                 Float16x2Regs>;
--}
--
--multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
--                        string DType, NVPTXRegClass d_reg> {
--  defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
--                            "f16", Float16x2Regs>;
--  defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
--                            "f32", Float32Regs>;
--}
--
--multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
--  defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>;
--  defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>;
--}
--
--multiclass WMMA_MMA_GA<string Geometry, string ALayout> {
--  defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">;
--  defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">;
--}
--
--multiclass WMMA_MMA_G<string Geometry> {
--  defm _col: WMMA_MMA_GA<Geometry, "col">;
--  defm _row: WMMA_MMA_GA<Geometry, "row">;
-+                  # "." # FragA.geom
-+                  # "." # FragD.ptx_elt_type
-+                  # "." # FragC.ptx_elt_type
-+                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
-+                  # FragD.regstring # ",\n\t\t"
-+                  # FragA.regstring # ",\n\t\t"
-+                  # FragB.regstring # ",\n\t\t"
-+                  # FragC.regstring # ";";
- }
- 
--defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">;
--defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">;
--defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">;
-+foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+  foreach layout_a = ["row", "col"] in {
-+    foreach layout_b = ["row", "col"] in {
-+      foreach frag_c = [WMMA_REGINFO<geom, "c", "f16">,
-+                        WMMA_REGINFO<geom, "c", "f32">] in {
-+        foreach frag_d = [WMMA_REGINFO<geom, "d", "f16">,
-+                          WMMA_REGINFO<geom, "d", "f32">] in {
-+          foreach satf = [0, 1] in {
-+            def : WMMA_MMA<WMMA_REGINFO<geom, "a", "f16">,
-+                           WMMA_REGINFO<geom, "b", "f16">,
-+                           frag_c, frag_d, layout_a, layout_b, satf>;
-+          } // satf
-+        } // frag_d
-+      } // frag_c
-+    } // layout_b
-+  } // layout_a
-+} // geom
--- 
-2.17.1
-
diff --git a/deps/patches/llvm-8.0-D59393-mma-ptx63-fix.patch b/deps/patches/llvm-8.0-D59393-mma-ptx63-fix.patch
deleted file mode 100644
index 10e6cd7b6f98db..00000000000000
--- a/deps/patches/llvm-8.0-D59393-mma-ptx63-fix.patch
+++ /dev/null
@@ -1,510 +0,0 @@
-From be924be7f9e699775fe7690d4b421bebfed73aa9 Mon Sep 17 00:00:00 2001
-From: Artem Belevich <tra@google.com>
-Date: Thu, 25 Apr 2019 22:27:46 +0000
-Subject: [PATCH] [NVPTX] generate correct MMA instruction mnemonics with
- PTX63+.
-
-PTX 6.3 requires using ".aligned" in the MMA instruction names.
-In order to generate correct name, now we pass current
-PTX version to each instruction as an extra constant operand
-and InstPrinter adjusts its output accordingly.
-
-Differential Revision: https://reviews.llvm.org/D59393
-
-llvm-svn: 359246
----
- .../NVPTX/InstPrinter/NVPTXInstPrinter.cpp    |  14 +
- .../NVPTX/InstPrinter/NVPTXInstPrinter.h      |   2 +
- lib/Target/NVPTX/NVPTXInstrInfo.td            |   4 +
- lib/Target/NVPTX/NVPTXIntrinsics.td           | 279 ++++++++++--------
- test/CodeGen/NVPTX/wmma.py                    |  17 +-
- 5 files changed, 184 insertions(+), 132 deletions(-)
-
-diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
-index b774fe169d7..6fb577d5499 100644
---- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
-+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
-@@ -270,6 +270,20 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
-     llvm_unreachable("Empty Modifier");
- }
- 
-+void NVPTXInstPrinter::printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
-+                                    const char *Modifier) {
-+  const MCOperand &MO = MI->getOperand(OpNum);
-+  int Imm = (int)MO.getImm();
-+  if (Modifier == nullptr || strcmp(Modifier, "version") == 0) {
-+    O << Imm; // Just print out PTX version
-+  } else if (strcmp(Modifier, "aligned") == 0) {
-+    // PTX63 requires '.aligned' in the name of the instruction.
-+    if (Imm >= 63)
-+      O << ".aligned";
-+  } else
-+    llvm_unreachable("Unknown Modifier");
-+}
-+
- void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
-                                        raw_ostream &O, const char *Modifier) {
-   printOperand(MI, OpNum, O);
-diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
-index f0f223aa057..588439137f9 100644
---- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
-+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
-@@ -41,6 +41,8 @@ public:
-                     const char *Modifier = nullptr);
-   void printLdStCode(const MCInst *MI, int OpNum,
-                      raw_ostream &O, const char *Modifier = nullptr);
-+  void printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
-+                    const char *Modifier = nullptr);
-   void printMemOperand(const MCInst *MI, int OpNum,
-                        raw_ostream &O, const char *Modifier = nullptr);
-   void printProtoIdent(const MCInst *MI, int OpNum,
-diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
-index 02a40b9f526..603d3212395 100644
---- a/lib/Target/NVPTX/NVPTXInstrInfo.td
-+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
-@@ -1549,6 +1549,10 @@ def LdStCode : Operand<i32> {
-   let PrintMethod = "printLdStCode";
- }
- 
-+def MmaCode : Operand<i32> {
-+  let PrintMethod = "printMmaCode";
-+}
-+
- def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
- def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
- 
-diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
-index b9a67ba5ed3..5cd534914f7 100644
---- a/lib/Target/NVPTX/NVPTXIntrinsics.td
-+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
-@@ -39,6 +39,24 @@ def AS_match {
-   }];
- }
- 
-+// A node that will be replaced with the current PTX version.
-+class PTX {
-+  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
-+    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
-+  }]>;
-+  // (i32 0) will be XForm'ed to the currently used PTX version.
-+  dag version = (PTXVerXform (i32 0));
-+}
-+def ptx : PTX;
-+
-+// Generates list of n sequential register names.
-+// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
-+class RegSeq<int n, string prefix> {
-+  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
-+                                        [prefix # !add(n, -1)]),
-+                            []);
-+}
-+
- //-----------------------------------
- // Synchronization and shuffle functions
- //-----------------------------------
-@@ -7385,14 +7403,6 @@ def INT_PTX_SREG_WARPSIZE :
-     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
-               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
- 
--class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
--// Generates list of n sequential register names.
--class RegSeq<int n, string prefix> {
--  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
--                                        [prefix # !add(n, -1)]),
--                            []);
--}
--
- // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
- // In addition to target-independent fields provided by WMMA_REGS, it adds
- // the fields commonly used to implement specific PTX instruction -- register
-@@ -7409,6 +7419,7 @@ class WMMA_REGINFO<string Geom, string Frag, string PtxEltType>
- 
-   // List of register names for the fragment -- ["ra0", "ra1",...]
-   list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
-+
-   // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
-   string regstring = "{{$" # !head(reg_names)
-                            # !foldl("", !tail(reg_names), a, b,
-@@ -7438,61 +7449,65 @@ class WMMA_REGINFO<string Geom, string Frag, string PtxEltType>
-   dag Ins = !dag(ins, ptx_regs, reg_names);
- }
- 
--class BuildPattern<dag Outs, PatFrag IntrMatcher, dag Ins> {
-+// Convert dag of arguments into a dag to match given intrinsic.
-+class BuildPatternI<Intrinsic Intr, dag Ins> {
-+  // Build a dag pattern that matches the intrinsic call.
-+  dag ret = !foreach(tmp, Ins,
-+                          !subst(imem, ADDRvar,
-+                          !subst(MEMri64, ADDRri64,
-+                          !subst(MEMri, ADDRri,
-+                          !subst(ins, Intr, tmp)))));
-+}
-+
-+// Same as above, but uses PatFrag instead of an Intrinsic.
-+class BuildPatternPF<PatFrag Intr, dag Ins> {
-   // Build a dag pattern that matches the intrinsic call.
--  // We want a dag that looks like this:
--  // (set <output args>, (intrinsic <input arguments>)) where input and
--  // output arguments are named patterns that would match corresponding
--  // input/output arguments of the instruction.
--  //
--  // First we construct (set <output arguments>) from instruction's outs dag by
--  // replacing dag operator 'outs' with 'set'.
--  dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
--  // Similarly, construct (intrinsic <input arguments>) sub-dag from
--  // instruction's input arguments, only now we also need to replace operands
--  // with patterns that would match them and the operator 'ins' with the
--  // intrinsic.
--  dag PatArgs = !foreach(tmp, Ins,
--                              !subst(imem, ADDRvar,
--                              !subst(MEMri64, ADDRri64,
--                              !subst(MEMri, ADDRri,
--                              !subst(ins, IntrMatcher, tmp)))));
--  // Finally, consatenate both parts together. !con() requires both dags to have
--  // the same operator, so we wrap PatArgs in a (set ...) dag.
--  dag ret = !con(PatOuts, (set PatArgs));
-+  dag ret = !foreach(tmp, Ins,
-+                          !subst(imem, ADDRvar,
-+                          !subst(MEMri64, ADDRri64,
-+                          !subst(MEMri, ADDRri,
-+                          !subst(ins, Intr, tmp)))));
-+}
-+
-+// Common WMMA-related fields used for building patterns for all MMA instructions.
-+class WMMA_INSTR<string _Intr, list<dag> _Args>
-+  : NVPTXInst<(outs), (ins), "?", []> {
-+  Intrinsic Intr = !cast<Intrinsic>(_Intr);
-+  // Concatenate all arguments into a single dag.
-+  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
-+  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
-+  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
- }
- 
- //
- // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
- //
- 
--class WMMA_LOAD_INTR_HELPER<WMMA_REGINFO Frag, string Layout, string Space,
--                            bit WithStride>
--                           : PatFrag <(ops),(ops)> {
--  // Intrinsic that matches this instruction.
--  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_LDST<"load", Frag, Layout,
--                                                   WithStride>.record);
--  let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
--  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
--  let PredicateCode = !cond(!eq(Space, ".shared"): AS_match.shared,
--                            !eq(Space, ".global"): AS_match.global,
--                            1: AS_match.generic);
--}
--
- class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
-                 DAGOperand SrcOp>
--  : EmptyNVPTXInst,
-+  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
-+                              [!con((ins SrcOp:$src),
-+                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
-     Requires<Frag.Predicates> {
--  // Pattern that matches the intrinsic for this instruction variant.
--  PatFrag IntrMatcher = WMMA_LOAD_INTR_HELPER<Frag, Layout, Space, WithStride>;
--  dag Ins = !con((ins SrcOp:$src), !if(WithStride, (ins Int32Regs:$ldm), (ins)));
-+  // Load/store intrinsics are overloaded on pointer's address space.
-+  // To match the right intrinsic, we need to build AS-constrained PatFrag.
-+  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
-+  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
-+  // Build PatFrag that only matches particular address space.
-+  PatFrag IntrFrag = PatFrag<PFOperands,
-+                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
-+                             !cond(!eq(Space, ".shared"): AS_match.shared,
-+                                   !eq(Space, ".global"): AS_match.global,
-+                                   1: AS_match.generic)>;
-+  // Build AS-constrained pattern.
-+  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
- 
--  let Pattern = [BuildPattern<Frag.Outs, IntrMatcher, Ins>.ret];
-   let OutOperandList = Frag.Outs;
--  let InOperandList = Ins;
-+  let InOperandList = !con(Args, (ins MmaCode:$ptx));
-   let AsmString = "wmma.load."
-                   # Frag.frag
-                   # ".sync"
-+                  # "${ptx:aligned}"
-                   # "." # Layout
-                   # "." # Frag.geom
-                   # Space
-@@ -7506,87 +7521,79 @@ class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
- //
- // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
- //
--class WMMA_STORE_INTR_HELPER<WMMA_REGINFO Frag, string Layout, string Space,
--                             bit WithStride>
--                            : PatFrag <(ops),(ops)> {
--  // Intrinsic that matches this instruction.
--  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_LDST<"store", Frag, Layout,
--                                                   WithStride>.record);
--  let Operands = !con((ops node:$dst),
--                      !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
--                      !if(WithStride, (ops node:$ldm), (ops)));
--  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
--  let PredicateCode = !cond(!eq(Space, ".shared"): AS_match.shared,
--                            !eq(Space, ".global"): AS_match.global,
--                            1: AS_match.generic);
--}
--
--class WMMA_STORE<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
--                 DAGOperand DstOp>
--  : EmptyNVPTXInst,
-+class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
-+                   bit WithStride, DAGOperand DstOp>
-+  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
-+               [!con((ins DstOp:$dst),
-+                     Frag.Ins,
-+                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
-     Requires<Frag.Predicates> {
--  PatFrag IntrMatcher = WMMA_STORE_INTR_HELPER<Frag, Layout, Space, WithStride>;
--  dag Ins = !con((ins DstOp:$src),
--                 Frag.Ins,
--                 !if(WithStride, (ins Int32Regs:$ldm), (ins)));
--  let Pattern = [BuildPattern<(set), IntrMatcher, Ins>.ret];
-+
-+  // Load/store intrinsics are overloaded on pointer's address space.
-+  // To match the right intrinsic, we need to build AS-constrained PatFrag.
-+  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
-+  dag PFOperands = !con((ops node:$dst),
-+                        !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
-+                        !if(WithStride, (ops node:$ldm), (ops)));
-+  // Build PatFrag that only matches particular address space.
-+  PatFrag IntrFrag = PatFrag<PFOperands,
-+                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
-+                             !cond(!eq(Space, ".shared"): AS_match.shared,
-+                                   !eq(Space, ".global"): AS_match.global,
-+                                   1: AS_match.generic)>;
-+  // Build AS-constrained pattern.
-+  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
-+
-+  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
-   let OutOperandList = (outs);
--  let InOperandList = Ins;
--  let AsmString = "wmma.store.d.sync."
--                  # Layout
-+  let AsmString = "wmma.store.d.sync"
-+                  # "${ptx:aligned}"
-+                  # "." # Layout
-                   # "." # Frag.geom
-                   # Space
-                   # "." # Frag.ptx_elt_type
--                  # " \t[$src],"
-+                  # " \t[$dst],"
-                   # Frag.regstring
-                   # !if(WithStride, ", $ldm", "")
-                   # ";";
- }
- 
- // Create all load/store variants
--foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
--  foreach layout = ["row", "col"] in {
--    foreach stride = [0, 1] in {
--      foreach space = [".global", ".shared", ""] in {
--        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
--          foreach frag = [WMMA_REGINFO<geom, "a", "f16">,
--                          WMMA_REGINFO<geom, "b", "f16">,
--                          WMMA_REGINFO<geom, "c", "f16">,
--                          WMMA_REGINFO<geom, "c", "f32">] in {
--              def : WMMA_LOAD<frag, layout, space, stride, addr>;
--          }
--          foreach frag = [WMMA_REGINFO<geom, "d", "f16">,
--                          WMMA_REGINFO<geom, "d", "f32">] in {
--              def : WMMA_STORE<frag, layout, space, stride, addr>;
--          }
--        } // addr
--      } // space
--    } // stride
--  } // layout
--} // geom
-+defset list<WMMA_INSTR> MMA_LDSTs  = {
-+  foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+    foreach layout = ["row", "col"] in {
-+      foreach stride = [0, 1] in {
-+        foreach space = [".global", ".shared", ""] in {
-+          foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
-+            foreach frag = [WMMA_REGINFO<geom, "a", "f16">,
-+                            WMMA_REGINFO<geom, "b", "f16">,
-+                            WMMA_REGINFO<geom, "c", "f16">,
-+                            WMMA_REGINFO<geom, "c", "f32">] in {
-+                def : WMMA_LOAD<frag, layout, space, stride, addr>;
-+            }
-+            foreach frag = [WMMA_REGINFO<geom, "d", "f16">,
-+                            WMMA_REGINFO<geom, "d", "f32">] in {
-+                def : WMMA_STORE_D<frag, layout, space, stride, addr>;
-+            }
-+          } // addr
-+        } // space
-+      } // stride
-+    } // layout
-+  } // geom
-+} // defset
- 
- // WMMA.MMA
- class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
-                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
-                string ALayout, string BLayout, int Satfinite>
--  : EmptyNVPTXInst,
-+  : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, FragC, FragD, Satfinite>.record,
-+                             [FragA.Ins, FragB.Ins, FragC.Ins]>,
-     Requires<FragC.Predicates> {
--  //Intrinsic Intr = int_nvvm_suld_1d_v4i32_zero;
--  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_MMA<ALayout, BLayout, FragC, FragD, Satfinite>.record);
--  dag Outs = FragD.Outs;
--  dag Ins = !con(FragA.Ins,
--                 FragB.Ins,
--                 FragC.Ins);
--
--  // Construct the pattern to match corresponding intrinsic call.
--  // mma does not load/store anything, so we don't need complex operand matching here.
--  dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
--  dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
--  let Pattern = [!con(PatOuts, (set PatArgs))];
--  let OutOperandList = Outs;
--  let InOperandList  = Ins;
--  let AsmString = "wmma.mma.sync."
--                  # ALayout
-+  let OutOperandList = FragD.Outs;
-+  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
-+  let AsmString = "wmma.mma.sync"
-+                  # "${ptx:aligned}"
-+                  # "." # ALayout
-                   # "." # BLayout
-                   # "." # FragA.geom
-                   # "." # FragD.ptx_elt_type
-@@ -7598,20 +7605,34 @@ class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
-                   # FragC.regstring # ";";
- }
- 
--foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
--  foreach layout_a = ["row", "col"] in {
--    foreach layout_b = ["row", "col"] in {
--      foreach frag_c = [WMMA_REGINFO<geom, "c", "f16">,
--                        WMMA_REGINFO<geom, "c", "f32">] in {
--        foreach frag_d = [WMMA_REGINFO<geom, "d", "f16">,
--                          WMMA_REGINFO<geom, "d", "f32">] in {
--          foreach satf = [0, 1] in {
--            def : WMMA_MMA<WMMA_REGINFO<geom, "a", "f16">,
--                           WMMA_REGINFO<geom, "b", "f16">,
--                           frag_c, frag_d, layout_a, layout_b, satf>;
--          } // satf
--        } // frag_d
--      } // frag_c
--    } // layout_b
--  } // layout_a
--} // geom
-+defset list<WMMA_INSTR> MMAs  = {
-+  foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+    foreach layout_a = ["row", "col"] in {
-+      foreach layout_b = ["row", "col"] in {
-+        foreach frag_c = [WMMA_REGINFO<geom, "c", "f16">,
-+                          WMMA_REGINFO<geom, "c", "f32">] in {
-+          foreach frag_d = [WMMA_REGINFO<geom, "d", "f16">,
-+                            WMMA_REGINFO<geom, "d", "f32">] in {
-+            foreach satf = [0, 1] in {
-+              def : WMMA_MMA<WMMA_REGINFO<geom, "a", "f16">,
-+                             WMMA_REGINFO<geom, "b", "f16">,
-+                             frag_c, frag_d, layout_a, layout_b, satf>;
-+            } // satf
-+          } // frag_d
-+        } // frag_c
-+      } // layout_b
-+    } // layout_a
-+  } // geom
-+} // defset
-+
-+// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
-+// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
-+// the instruction record.
-+class WMMA_PAT<WMMA_INSTR wi>
-+      : Pat<wi.IntrinsicPattern,
-+            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
-+                 (wi ptx.version))>;
-+
-+// Build intrinsic->instruction patterns for all MMA instructions.
-+foreach mma = !listconcat(MMAs, MMA_LDSTs) in
-+  def : WMMA_PAT<mma>;
-diff --git a/test/CodeGen/NVPTX/wmma.py b/test/CodeGen/NVPTX/wmma.py
-index 14bbfd7df09..72d189ca050 100644
---- a/test/CodeGen/NVPTX/wmma.py
-+++ b/test/CodeGen/NVPTX/wmma.py
-@@ -3,9 +3,12 @@
- 
- # RUN: python %s > %t.ll
- # RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx61 | FileCheck %t.ll
-+# RUN: python %s --ptx=63 > %t-ptx63.ll
-+# RUN: llc < %t-ptx63.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %t-ptx63.ll
- 
- from __future__ import print_function
- 
-+import argparse
- from itertools import product
- from string import Template
- 
-@@ -64,7 +67,7 @@ define ${ret_ty} @test_${function}_o(i8 ${as}* %src ${extra_args}) {
- }
- """
-   intrinsic_template = "llvm.nvvm.wmma.${geom}.load.${abc}.${layout}${stride}.${itype}.${pspace}"
--  instruction_template = "wmma.load.${abc}.sync.${layout}.${geom}${space}.${itype}"
-+  instruction_template = "wmma.load.${abc}.sync${aligned}.${layout}.${geom}${space}.${itype}"
- 
-   for geom, abc, layout, space, stride, itype in product(
-       known_geoms,
-@@ -76,6 +79,7 @@ define ${ret_ty} @test_${function}_o(i8 ${as}* %src ${extra_args}) {
- 
-     params = {
-         "abc" : abc,
-+        "aligned" : ".aligned" if ptx_version >= 63 else "",
-         "layout" : layout,
-         "space" : space,
-         "stride" : stride,
-@@ -135,7 +139,7 @@ define void @test_${function}_o(i8 ${as}* %src, ${args}${extra_args}) {
- }
- """
-   intrinsic_template = "llvm.nvvm.wmma.${geom}.store.${abc}.${layout}${stride}.${itype}.${pspace}"
--  instruction_template = "wmma.store.${abc}.sync.${layout}.${geom}${space}.${itype}"
-+  instruction_template = "wmma.store.${abc}.sync${aligned}.${layout}.${geom}${space}.${itype}"
- 
-   for geom, abc, layout, space, stride, itype in product(
-       known_geoms,
-@@ -147,6 +151,7 @@ define void @test_${function}_o(i8 ${as}* %src, ${args}${extra_args}) {
- 
-     params = {
-         "abc" : abc,
-+        "aligned" : ".aligned" if ptx_version >= 63 else "",
-         "layout" : layout,
-         "space" : space,
-         "stride" : stride,
-@@ -191,7 +196,7 @@ define ${ret_ty} @test_${function}(
- }
- """
-   intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}.${dtype}.${ctype}${satf}"
--  instruction_template = "wmma.mma.sync.${alayout}.${blayout}.${geom}.${dtype}.${ctype}${satf}"
-+  instruction_template = "wmma.mma.sync${aligned}.${alayout}.${blayout}.${geom}.${dtype}.${ctype}${satf}"
- 
-   for geom, alayout, blayout, ctype, dtype, satf in product(
-       known_geoms,
-@@ -202,6 +207,7 @@ define ${ret_ty} @test_${function}(
-       [".satfinite", ""]):
- 
-     params = {
-+        "aligned" : ".aligned" if ptx_version >= 63 else "",
-         "alayout" : alayout,
-         "blayout" : blayout,
-         "ctype" : ctype,
-@@ -230,4 +236,9 @@ def main():
-   gen_wmma_store_tests()
-   gen_wmma_mma_tests()
- 
-+parser = argparse.ArgumentParser()
-+parser.add_argument('--ptx', type=int, default=60)
-+args = parser.parse_args()
-+ptx_version = args.ptx
-+
- main()
--- 
-2.17.1
-
diff --git a/deps/patches/llvm-8.0-D63688-wasm-isLocal.patch b/deps/patches/llvm-8.0-D63688-wasm-isLocal.patch
deleted file mode 100644
index 820363d3054246..00000000000000
--- a/deps/patches/llvm-8.0-D63688-wasm-isLocal.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 83d5085a7fcbb4596d964dbe037c5ebf4de02b69 Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@alumni.harvard.edu>
-Date: Sun, 23 Jun 2019 00:29:59 +0000
-Subject: [PATCH] [Support] Fix build under Emscripten
-
-Summary:
-Emscripten's libc doesn't define MNT_LOCAL, thus causing a build
-failure in the fallback path. However, to the best of my knowledge,
-it also doesn't support remote file system mounts, so we may simply
-return `true` here (as we do for e.g. Fuchsia). With this fix, the
-core LLVM libraries build correctly under emscripten (though some
-of the tools and utils do not).
-
-Reviewers: kripken
-Differential Revision: https://reviews.llvm.org/D63688
-
-llvm-svn: 364143
-(cherry picked from commit 5f4ae7c45718618c4c571495e7d910d5722f70ad)
----
- llvm/lib/Support/Unix/Path.inc | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
-index d7cc0d627d0..eb38a71fffb 100644
---- a/lib/Support/Unix/Path.inc
-+++ b/lib/Support/Unix/Path.inc
-@@ -398,6 +398,9 @@ static bool is_local_impl(struct STATVFS &Vfs) {
- #elif defined(__Fuchsia__)
-   // Fuchsia doesn't yet support remote filesystem mounts.
-   return true;
-+#elif defined(__EMSCRIPTEN__)
-+  // Emscripten doesn't currently support remote filesystem mounts.
-+  return true;
- #elif defined(__HAIKU__)
-   // Haiku doesn't expose this information.
-   return false;
--- 
-2.24.0
-
diff --git a/deps/patches/llvm-8.0-D65174-limit-merge-stores.patch b/deps/patches/llvm-8.0-D65174-limit-merge-stores.patch
deleted file mode 100644
index 646c44f454641e..00000000000000
--- a/deps/patches/llvm-8.0-D65174-limit-merge-stores.patch
+++ /dev/null
@@ -1,119 +0,0 @@
-From 19992a8c7f2df2000ea7fd4a284ec7b407400fb0 Mon Sep 17 00:00:00 2001
-From: Wei Mi <wmi@google.com>
-Date: Sun, 29 Mar 2020 17:14:12 -0400
-Subject: [PATCH] [DAGCombine] Limit the number of times for the same store and
- root nodes to bail out in store merging dependence check.
-
-We run into a case where dependence check in store merging bail out many times
-for the same store and root nodes in a huge basicblock. That increases compile
-time by almost 100x. The patch add a map to track how many times the bailing
-out happen for the same store and root, and if it is over a limit, stop
-considering the store with the same root as a merging candidate.
-
-Differential Revision: https://reviews.llvm.org/D65174
----
- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 45 +++++++++++++++++--
- 1 file changed, 42 insertions(+), 3 deletions(-)
-
-diff --git llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-index 6af01423ca1..9c7e37d6945 100644
---- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-@@ -112,6 +112,11 @@ static cl::opt<bool>
-   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
-                     cl::desc("DAG combiner may split indexing from loads"));
- 
-+static cl::opt<unsigned> StoreMergeDependenceLimit(
-+    "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
-+    cl::desc("Limit the number of times for the same StoreNode and RootNode "
-+             "to bail out in store merging dependence check"));
-+
- namespace {
- 
-   class DAGCombiner {
-@@ -145,6 +150,14 @@ namespace {
-     /// which have not yet been combined to the worklist.
-     SmallPtrSet<SDNode *, 32> CombinedNodes;
- 
-+    /// Map from candidate StoreNode to the pair of RootNode and count.
-+    /// The count is used to track how many times we have seen the StoreNode
-+    /// with the same RootNode bail out in dependence check. If we have seen
-+    /// the bail out for the same pair many times over a limit, we won't
-+    /// consider the StoreNode with the same RootNode as store merging
-+    /// candidate again.
-+    DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
-+
-     // AA - Used for DAG load/store alias analysis.
-     AliasAnalysis *AA;
- 
-@@ -190,6 +203,7 @@ namespace {
-     /// Remove all instances of N from the worklist.
-     void removeFromWorklist(SDNode *N) {
-       CombinedNodes.erase(N);
-+      StoreRootCountMap.erase(N);
- 
-       auto It = WorklistMap.find(N);
-       if (It == WorklistMap.end())
-@@ -14423,6 +14437,18 @@ void DAGCombiner::getStoreMergeCandidates(
-     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
-   };
- 
-+  // Check if the pair of StoreNode and the RootNode already bail out many
-+  // times which is over the limit in dependence check.
-+  auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
-+                                        SDNode *RootNode) -> bool {
-+    auto RootCount = StoreRootCountMap.find(StoreNode);
-+    if (RootCount != StoreRootCountMap.end() &&
-+        RootCount->second.first == RootNode &&
-+        RootCount->second.second > StoreMergeDependenceLimit)
-+      return true;
-+    return false;
-+  };
-+
-   // We looking for a root node which is an ancestor to all mergable
-   // stores. We search up through a load, to our root and then down
-   // through all children. For instance we will find Store{1,2,3} if
-@@ -14450,7 +14476,8 @@ void DAGCombiner::getStoreMergeCandidates(
-             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
-               BaseIndexOffset Ptr;
-               int64_t PtrDiff;
--              if (CandidateMatch(OtherST, Ptr, PtrDiff))
-+              if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
-+                  !OverLimitInDependenceCheck(OtherST, RootNode))
-                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
-             }
-   } else
-@@ -14459,7 +14486,8 @@ void DAGCombiner::getStoreMergeCandidates(
-         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
-           BaseIndexOffset Ptr;
-           int64_t PtrDiff;
--          if (CandidateMatch(OtherST, Ptr, PtrDiff))
-+          if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
-+              !OverLimitInDependenceCheck(OtherST, RootNode))
-             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
-         }
- }
-@@ -14517,8 +14545,19 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
-   // Search through DAG. We can stop early if we find a store node.
-   for (unsigned i = 0; i < NumStores; ++i)
-     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
--                                     Max))
-+                                     Max)) {
-+      // If the searching bail out, record the StoreNode and RootNode in the
-+      // StoreRootCountMap. If we have seen the pair many times over a limit,
-+      // we won't add the StoreNode into StoreNodes set again.
-+      if (Visited.size() >= Max) {
-+        auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
-+        if (RootCount.first == RootNode)
-+          RootCount.second++;
-+        else
-+          RootCount = {RootNode, 1};
-+      }
-       return false;
-+    }
-   return true;
- }
- 
--- 
-2.25.2
-
diff --git a/deps/patches/llvm-8.0-D66401-mingw-reloc.patch b/deps/patches/llvm-8.0-D66401-mingw-reloc.patch
deleted file mode 100644
index 384399f2162f27..00000000000000
--- a/deps/patches/llvm-8.0-D66401-mingw-reloc.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-diff --git a/test/CodeGen/X86/mingw-refptr.ll b/test/CodeGen/X86/mingw-refptr.ll
---- a/test/CodeGen/X86/mingw-refptr.ll
-+++ b/test/CodeGen/X86/mingw-refptr.ll
-@@ -1,5 +1,6 @@
- ; RUN: llc < %s -mtriple=x86_64-w64-mingw32 | FileCheck %s -check-prefix=CHECK-X64
- ; RUN: llc < %s -mtriple=i686-w64-mingw32 | FileCheck %s -check-prefix=CHECK-X86
-+; RUN: llc < %s -mtriple=i686-w64-mingw32-none-elf | FileCheck %s -check-prefix=CHECK-X86-ELF
- 
- @var = external local_unnamed_addr global i32, align 4
- @dsolocalvar = external dso_local local_unnamed_addr global i32, align 4
-@@ -16,6 +17,9 @@
- ; CHECK-X86:    movl .refptr._var, %eax
- ; CHECK-X86:    movl (%eax), %eax
- ; CHECK-X86:    retl
-+; CHECK-X86-ELF-LABEL: getVar:
-+; CHECK-X86-ELF:    movl var, %eax
-+; CHECK-X86-ELF:    retl
- entry:
-   %0 = load i32, i32* @var, align 4
-   ret i32 %0
-@@ -66,6 +70,9 @@
- ; CHECK-X86:    movl __imp__extvar, %eax
- ; CHECK-X86:    movl (%eax), %eax
- ; CHECK-X86:    retl
-+; CHECK-X86-ELF-LABEL: getExtVar:
-+; CHECK-X86-ELF:    movl extvar, %eax
-+; CHECK-X86-ELF:    retl
- entry:
-   %0 = load i32, i32* @extvar, align 4
-   ret i32 %0
-diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
---- a/lib/Target/X86/X86Subtarget.cpp
-+++ b/lib/Target/X86/X86Subtarget.cpp
-@@ -146,6 +146,9 @@
-       return X86II::MO_DLLIMPORT;
-     return X86II::MO_COFFSTUB;
-   }
-+  // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables.
-+  if (isOSWindows())
-+    return X86II::MO_NO_FLAG;
- 
-   if (is64Bit()) {
-     // ELF supports a large, truly PIC code model with non-PC relative GOT
-diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
---- a/lib/Target/TargetMachine.cpp
-+++ b/lib/Target/TargetMachine.cpp
-@@ -128,8 +128,8 @@
-   // don't assume the variables to be DSO local unless we actually know
-   // that for sure. This only has to be done for variables; for functions
-   // the linker can insert thunks for calling functions from another DLL.
--  if (TT.isWindowsGNUEnvironment() && GV && GV->isDeclarationForLinker() &&
--      isa<GlobalVariable>(GV))
-+  if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() && GV &&
-+      GV->isDeclarationForLinker() && isa<GlobalVariable>(GV))
-     return false;
- 
-   // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
-@@ -142,7 +142,9 @@
-   // Make an exception for windows OS in the triple: Some firmware builds use
-   // *-win32-macho triples. This (accidentally?) produced windows relocations
-   // without GOT tables in older clang versions; Keep this behaviour.
--  if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO()))
-+  // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables
-+  // either.
-+  if (TT.isOSBinFormatCOFF() || TT.isOSWindows())
-     return true;
- 
-   // Most PIC code sequences that assume that a symbol is local cannot
-
diff --git a/deps/patches/llvm-8.0-D66657-codegen-degenerate.patch b/deps/patches/llvm-8.0-D66657-codegen-degenerate.patch
deleted file mode 100644
index ddcf4dc7ddaa66..00000000000000
--- a/deps/patches/llvm-8.0-D66657-codegen-degenerate.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From 4c7e1defbddafcfcfe1211b041d43a36114a8f48 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Sat, 14 Dec 2019 10:33:30 -0500
-Subject: [PATCH 2/2] [CodegenPrepare] Guard against degenerate branches
-
-Summary:
-Guard against a potential crash observed in https://github.com/JuliaLang/julia/issues/32994#issuecomment-524249628
-If two branches are collapsed we can encounter a degenerate conditional branch `TBB==FBB`.
-The subsequent code assumes that they differ, so we exit out early.
-
-Reviewers: ributzka, spatel
-
-Subscribers: loladiro, dexonsmith, hiraditya, llvm-commits
-
-Tags: #llvm
-
-Differential Revision: https://reviews.llvm.org/D66657
----
- llvm/lib/CodeGen/CodeGenPrepare.cpp            |  4 ++++
- .../CodeGen/X86/codegen-prepare-collapse.ll    | 18 ++++++++++++++++++
- 2 files changed, 22 insertions(+)
- create mode 100644 llvm/test/CodeGen/X86/codegen-prepare-collapse.ll
-
-diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
-index c35f8666fa3..3647641c594 100644
---- a/lib/CodeGen/CodeGenPrepare.cpp
-+++ b/lib/CodeGen/CodeGenPrepare.cpp
-@@ -6929,6 +6929,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
-     if (Br1->getMetadata(LLVMContext::MD_unpredictable))
-       continue;
- 
-+    // The merging of mostly empty BB can cause a degenerate branch.
-+    if (TBB == FBB)
-+      continue;
-+
-     unsigned Opc;
-     Value *Cond1, *Cond2;
-     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
-diff --git a/test/CodeGen/X86/codegen-prepare-collapse.ll b/test/CodeGen/X86/codegen-prepare-collapse.ll
-new file mode 100644
-index 00000000000..18e3ef7afbd
---- /dev/null
-+++ b/test/CodeGen/X86/codegen-prepare-collapse.ll
-@@ -0,0 +1,18 @@
-+; RUN: llc -fast-isel=true -O1 -mtriple=x86_64-unkown-linux-gnu -start-before=codegenprepare -stop-after=codegenprepare -o - < %s | FileCheck %s
-+
-+; CHECK-LABEL: @foo
-+define void @foo() {
-+top:
-+; CHECK: br label %L34
-+  br label %L34
-+
-+L34:                                              ; preds = %L34, %L34, %top
-+  %.sroa.075.0 = phi i64 [ undef, %top ], [ undef, %L34 ], [ undef, %L34 ]
-+  %0 = icmp sgt i8 undef, -1
-+  %cond5896 = icmp eq i8 0, 2
-+  %cond58 = and i1 %cond5896, %0
-+; During codegenprepare such degenerate branches can occur and should not
-+; lead to crashes.
-+; CHECK: br label %L34
-+  br i1 %cond58, label %L34, label %L34
-+}
--- 
-2.24.1
-
diff --git a/deps/patches/llvm-8.0-D71495-vectorize-freduce.patch b/deps/patches/llvm-8.0-D71495-vectorize-freduce.patch
deleted file mode 100644
index fb461920c176fb..00000000000000
--- a/deps/patches/llvm-8.0-D71495-vectorize-freduce.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From 7c30e23f115ae285b497ef11af0153703111dff2 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Sun, 22 Dec 2019 14:25:50 -0500
-Subject: [PATCH 1/2] [SelectionDAG] Copy FP flags when visiting a binary
- instruction.
-
-Summary:
-We noticed in Julia that the sequence below no longer turned into
-a sequence of FMA instructions in LLVM 7+, but it did in LLVM 6.
-
-```
-    %29 = fmul contract <4 x double> %wide.load, %wide.load16
-    %30 = fmul contract <4 x double> %wide.load13, %wide.load17
-    %31 = fmul contract <4 x double> %wide.load14, %wide.load18
-    %32 = fmul contract <4 x double> %wide.load15, %wide.load19
-    %33 = fadd fast <4 x double> %vec.phi, %29
-    %34 = fadd fast <4 x double> %vec.phi10, %30
-    %35 = fadd fast <4 x double> %vec.phi11, %31
-    %36 = fadd fast <4 x double> %vec.phi12, %32
-```
-
-Unlike Clang, Julia doesn't set the `unsafe-fp-math=true` function
-attribute, but rather emits more local instruction flags.
-
-This partially undoes https://reviews.llvm.org/D46854 and if required I can try to minimize the test further.
-
-Reviewers: spatel, mcberg2017
-
-Reviewed By: spatel
-
-Subscribers: chriselrod, merge_guards_bot, hiraditya, llvm-commits
-
-Tags: #llvm
-
-Differential Revision: https://reviews.llvm.org/D71495
----
- .../SelectionDAG/SelectionDAGBuilder.cpp      |  7 +++++
- llvm/test/CodeGen/X86/fmf-reduction.ll        | 26 +++++++++++++++++++
- 2 files changed, 33 insertions(+)
- create mode 100644 llvm/test/CodeGen/X86/fmf-reduction.ll
-
-diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-index bfeb3d1bc2b..e6362c19691 100644
---- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-@@ -2833,6 +2833,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
-   if (isVectorReductionOp(&I)) {
-     Flags.setVectorReduction(true);
-     LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
-+
-+    // If no flags are set we will propagate the incoming flags, if any flags
-+    // are set, we will intersect them with the incoming flag and so we need to
-+    // copy the FMF flags here.
-+    if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
-+      Flags.copyFMF(*FPOp);
-+    }
-   }
- 
-   SDValue Op1 = getValue(I.getOperand(0));
-diff --git a/test/CodeGen/X86/fmf-reduction.ll b/test/CodeGen/X86/fmf-reduction.ll
-new file mode 100644
-index 00000000000..1d669d2a924
---- /dev/null
-+++ b/test/CodeGen/X86/fmf-reduction.ll
-@@ -0,0 +1,26 @@
-+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-+; RUN: llc < %s -mtriple=x86_64-- -mattr=fma | FileCheck %s
-+
-+; Propagation of IR FMF should not drop flags when adding the DAG reduction flag.
-+; This should include an FMA instruction, not separate FMUL/FADD.
-+
-+define double @julia_dotf(<4 x double> %x, <4 x double> %y, <4 x double> %z, i1 %t3) {
-+; CHECK-LABEL: julia_dotf:
-+; CHECK:       # %bb.0:
-+; CHECK-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
-+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
-+; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
-+; CHECK-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-+; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
-+; CHECK-NEXT:    vzeroupper
-+; CHECK-NEXT:    retq
-+  %t1 = fmul contract <4 x double> %x, %y
-+  %t2 = fadd fast <4 x double> %z, %t1
-+  %rdx.shuf = shufflevector <4 x double> %t2, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-+  %bin.rdx22 = fadd fast <4 x double> %t2, %rdx.shuf
-+  %rdx.shuf23 = shufflevector <4 x double> %bin.rdx22, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-+  %bin.rdx24 = fadd fast <4 x double> %bin.rdx22, %rdx.shuf23
-+  %t4 = extractelement <4 x double> %bin.rdx24, i32 0
-+  ret double %t4
-+}
-+
--- 
-2.24.1
-
diff --git a/deps/patches/llvm-9.0-D65174-limit-merge-stores.patch b/deps/patches/llvm-9.0-D65174-limit-merge-stores.patch
deleted file mode 100644
index 6d6cfb4acd8001..00000000000000
--- a/deps/patches/llvm-9.0-D65174-limit-merge-stores.patch
+++ /dev/null
@@ -1,116 +0,0 @@
-commit f49c107f06c6a98d11a09d758f08554c78b9b933
-Author: Wei Mi <wmi@google.com>
-Date:   Wed Jul 31 19:59:24 2019 +0000
-
-    [DAGCombine] Limit the number of times for the same store and root nodes
-    to bail out in store merging dependence check.
-    
-    We run into a case where dependence check in store merging bail out many times
-    for the same store and root nodes in a huge basicblock. That increases compile
-    time by almost 100x. The patch add a map to track how many times the bailing
-    out happen for the same store and root, and if it is over a limit, stop
-    considering the store with the same root as a merging candidate.
-    
-    Differential Revision: https://reviews.llvm.org/D65174
-    
-    llvm-svn: 367472
-
-diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-index bf62aa86509..2e5ba82af22 100644
---- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-@@ -120,6 +120,11 @@ static cl::opt<unsigned> TokenFactorInlineLimit(
-     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
-     cl::desc("Limit the number of operands to inline for Token Factors"));
- 
-+static cl::opt<unsigned> StoreMergeDependenceLimit(
-+    "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
-+    cl::desc("Limit the number of times for the same StoreNode and RootNode "
-+             "to bail out in store merging dependence check"));
-+
- namespace {
- 
-   class DAGCombiner {
-@@ -157,6 +162,14 @@ namespace {
-     /// which have not yet been combined to the worklist.
-     SmallPtrSet<SDNode *, 32> CombinedNodes;
- 
-+    /// Map from candidate StoreNode to the pair of RootNode and count.
-+    /// The count is used to track how many times we have seen the StoreNode
-+    /// with the same RootNode bail out in dependence check. If we have seen
-+    /// the bail out for the same pair many times over a limit, we won't
-+    /// consider the StoreNode with the same RootNode as store merging
-+    /// candidate again.
-+    DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
-+
-     // AA - Used for DAG load/store alias analysis.
-     AliasAnalysis *AA;
- 
-@@ -241,6 +254,7 @@ namespace {
-     void removeFromWorklist(SDNode *N) {
-       CombinedNodes.erase(N);
-       PruningList.remove(N);
-+      StoreRootCountMap.erase(N);
- 
-       auto It = WorklistMap.find(N);
-       if (It == WorklistMap.end())
-@@ -15423,6 +15437,18 @@ void DAGCombiner::getStoreMergeCandidates(
-     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
-   };
- 
-+  // Check if the pair of StoreNode and the RootNode already bail out many
-+  // times which is over the limit in dependence check.
-+  auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
-+                                        SDNode *RootNode) -> bool {
-+    auto RootCount = StoreRootCountMap.find(StoreNode);
-+    if (RootCount != StoreRootCountMap.end() &&
-+        RootCount->second.first == RootNode &&
-+        RootCount->second.second > StoreMergeDependenceLimit)
-+      return true;
-+    return false;
-+  };
-+
-   // We looking for a root node which is an ancestor to all mergable
-   // stores. We search up through a load, to our root and then down
-   // through all children. For instance we will find Store{1,2,3} if
-@@ -15452,7 +15478,8 @@ void DAGCombiner::getStoreMergeCandidates(
-             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
-               BaseIndexOffset Ptr;
-               int64_t PtrDiff;
--              if (CandidateMatch(OtherST, Ptr, PtrDiff))
-+              if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
-+                  !OverLimitInDependenceCheck(OtherST, RootNode))
-                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
-             }
-   } else
-@@ -15462,7 +15489,8 @@ void DAGCombiner::getStoreMergeCandidates(
-         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
-           BaseIndexOffset Ptr;
-           int64_t PtrDiff;
--          if (CandidateMatch(OtherST, Ptr, PtrDiff))
-+          if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
-+              !OverLimitInDependenceCheck(OtherST, RootNode))
-             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
-         }
- }
-@@ -15520,8 +15548,19 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
-   // Search through DAG. We can stop early if we find a store node.
-   for (unsigned i = 0; i < NumStores; ++i)
-     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
--                                     Max))
-+                                     Max)) {
-+      // If the searching bail out, record the StoreNode and RootNode in the
-+      // StoreRootCountMap. If we have seen the pair many times over a limit,
-+      // we won't add the StoreNode into StoreNodes set again.
-+      if (Visited.size() >= Max) {
-+        auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
-+        if (RootCount.first == RootNode)
-+          RootCount.second++;
-+        else
-+          RootCount = {RootNode, 1};
-+      }
-       return false;
-+    }
-   return true;
- }
- 
diff --git a/deps/patches/llvm-9.0-D78196.patch b/deps/patches/llvm-9.0-D78196.patch
deleted file mode 100644
index 6ae23fd7d8600d..00000000000000
--- a/deps/patches/llvm-9.0-D78196.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
---- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-@@ -210,6 +210,10 @@
-     for (auto *Sym : UpdateOther)
-       if (Sym->isVariable())
-         copyLocalEntry(Sym, Sym->getVariableValue());
-+
-+    // Clear the set of symbols that needs to be updated so the streamer can
-+    // be reused without issues.
-+    UpdateOther.clear();
-   }
- 
- private:
-
diff --git a/deps/patches/llvm-9.0-D85499.patch b/deps/patches/llvm-9.0-D85499.patch
deleted file mode 100644
index 1be91fc4717f58..00000000000000
--- a/deps/patches/llvm-9.0-D85499.patch
+++ /dev/null
@@ -1,425 +0,0 @@
-commit ac8729e23232d0fd3933b76093a40b7c65332aff
-Author: Keno Fischer <keno@juliacomputing.com>
-Date:   Fri Aug 7 00:31:43 2020 -0400
-
-    [X86] Canonicalize andnp for bitmask arithmetic
-    
-    We have a DAG combine that tries to fold (vselect cond, 0000..., X) -> (andnp cond, x).
-    However, it does so by attempting to create an i64 vector with the number
-    of elements obtained by truncating division by 64 from the bitwidth. This is
-    bad for mask vectors like v8i1, since that division is just zero. Besides,
-    we don't want i64 vectors anyway. The easy change is just to avoid changing
-    the VT, but this is slightly problematic because the canonical pattern for
-    `kandn` is `(and (vnot a) b)` rather than `(x86andnp a b)`, so this fails
-    to select. Rather than playing games here with having the mask vectors
-    use a different canonical representation, the bulk of this commit switches
-    the canonical ISD representation for `kandn` to `(x86andnp a b)` such
-    that all vector types may be handled equally here. To avoid regressing
-    other tests, we need to extend a few other folds to handle `x86andnp` in
-    addition to plain `and`. However, that should be generally a good
-    improvement, since x86andnp is already canonical for non-i1 vectors
-    prior to this commit, and said folds were just missing.
-    
-    When all is said and done, fixes the issue reported in
-    https://github.com/JuliaLang/julia/issues/36955.
-    
-    Differential Revision: https://reviews.llvm.org/D85499
-
-diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
-index 34ad589d205..eb21b0de89d 100644
---- a/lib/Target/X86/X86ISelDAGToDAG.cpp
-+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
-@@ -503,7 +503,7 @@ namespace {
-     bool isMaskZeroExtended(SDNode *N) const;
-     bool tryShiftAmountMod(SDNode *N);
-     bool tryShrinkShlLogicImm(SDNode *N);
--    bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
-+    bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask, bool Invert);
- 
-     MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
-                                 const SDLoc &dl, MVT VT, SDNode *Node);
-@@ -2998,7 +2998,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
-       bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
-       // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
-       if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
--        unsigned NewOpc = 
-+        unsigned NewOpc =
-           ((Opc == X86ISD::ADD) == IsOne)
-               ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
-               : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
-@@ -3999,8 +3999,8 @@ static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
- 
- // Try to create VPTESTM instruction. If InMask is not null, it will be used
- // to form a masked operation.
--bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
--                                 SDValue InMask) {
-+bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue InMask,
-+                                 bool Invert) {
-   assert(Subtarget->hasAVX512() && "Expected AVX512!");
-   assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&
-          "Unexpected VT!");
-@@ -4140,6 +4140,9 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
-   }
- 
-   bool IsTestN = CC == ISD::SETEQ;
-+  if (Invert)
-+    IsTestN = !IsTestN;
-+
-   unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
-                                IsMasked);
- 
-@@ -4309,16 +4312,27 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
-       return;
-     break;
- 
-+  case X86ISD::ANDNP:
-+    if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
-+      SDValue N0 = Node->getOperand(0);
-+      SDValue N1 = Node->getOperand(1);
-+      // Try to form a masked VPTESTM
-+      if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
-+          tryVPTESTM(Node, N0, N1, true))
-+        return;
-+    }
-+    break;
-+
-   case ISD::AND:
-     if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
-       // Try to form a masked VPTESTM. Operands can be in either order.
-       SDValue N0 = Node->getOperand(0);
-       SDValue N1 = Node->getOperand(1);
-       if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
--          tryVPTESTM(Node, N0, N1))
-+          tryVPTESTM(Node, N0, N1, false))
-         return;
-       if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
--          tryVPTESTM(Node, N1, N0))
-+          tryVPTESTM(Node, N1, N0, false))
-         return;
-     }
- 
-@@ -5000,7 +5014,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
-   }
- 
-   case ISD::SETCC: {
--    if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
-+    if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue(), false))
-       return;
- 
-     break;
-diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
-index 920cdd7e625..6b9738074c7 100644
---- a/lib/Target/X86/X86ISelLowering.cpp
-+++ b/lib/Target/X86/X86ISelLowering.cpp
-@@ -196,7 +196,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
-   // Integer absolute.
-   if (Subtarget.hasCMov()) {
-     setOperationAction(ISD::ABS            , MVT::i16  , Custom);
--    setOperationAction(ISD::ABS            , MVT::i32  , Custom); 
-+    setOperationAction(ISD::ABS            , MVT::i32  , Custom);
-   }
-   setOperationAction(ISD::ABS              , MVT::i64  , Custom);
- 
-@@ -26053,7 +26053,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
- 
-   // If this is a canonical idempotent atomicrmw w/no uses, we have a better
-   // lowering available in lowerAtomicArith.
--  // TODO: push more cases through this path. 
-+  // TODO: push more cases through this path.
-   if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
-     if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
-         AI->use_empty())
-@@ -26111,7 +26111,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
- /// Emit a locked operation on a stack location which does not change any
- /// memory location, but does involve a lock prefix.  Location is chosen to be
- /// a) very likely accessed only by a single thread to minimize cache traffic,
--/// and b) definitely dereferenceable.  Returns the new Chain result.  
-+/// and b) definitely dereferenceable.  Returns the new Chain result.
- static SDValue emitLockedStackOp(SelectionDAG &DAG,
-                                  const X86Subtarget &Subtarget,
-                                  SDValue Chain, SDLoc DL) {
-@@ -26120,22 +26120,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
-   // operations issued by the current processor.  As such, the location
-   // referenced is not relevant for the ordering properties of the instruction.
-   // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
--  // 8.2.3.9  Loads and Stores Are Not Reordered with Locked Instructions 
-+  // 8.2.3.9  Loads and Stores Are Not Reordered with Locked Instructions
-   // 2) Using an immediate operand appears to be the best encoding choice
-   // here since it doesn't require an extra register.
-   // 3) OR appears to be very slightly faster than ADD. (Though, the difference
-   // is small enough it might just be measurement noise.)
-   // 4) When choosing offsets, there are several contributing factors:
-   //   a) If there's no redzone, we default to TOS.  (We could allocate a cache
--  //      line aligned stack object to improve this case.) 
-+  //      line aligned stack object to improve this case.)
-   //   b) To minimize our chances of introducing a false dependence, we prefer
--  //      to offset the stack usage from TOS slightly.  
-+  //      to offset the stack usage from TOS slightly.
-   //   c) To minimize concerns about cross thread stack usage - in particular,
-   //      the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
-   //      captures state in the TOS frame and accesses it from many threads -
-   //      we want to use an offset such that the offset is in a distinct cache
-   //      line from the TOS frame.
--  // 
-+  //
-   // For a general discussion of the tradeoffs and benchmark results, see:
-   // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
- 
-@@ -26188,7 +26188,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
-     if (Subtarget.hasMFence())
-       return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
- 
--    SDValue Chain = Op.getOperand(0); 
-+    SDValue Chain = Op.getOperand(0);
-     return emitLockedStackOp(DAG, Subtarget, Chain, dl);
-   }
- 
-@@ -26677,12 +26677,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
-     // seq_cst which isn't SingleThread, everything just needs to be preserved
-     // during codegen and then dropped. Note that we expect (but don't assume),
-     // that orderings other than seq_cst and acq_rel have been canonicalized to
--    // a store or load. 
-+    // a store or load.
-     if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
-         AN->getSyncScopeID() == SyncScope::System) {
-       // Prefer a locked operation against a stack location to minimize cache
-       // traffic.  This assumes that stack locations are very likely to be
--      // accessed only by the owning thread. 
-+      // accessed only by the owning thread.
-       SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
-       assert(!N->hasAnyUseOfValue(0));
-       // NOTE: The getUNDEF is needed to give something for the unused result 0.
-@@ -35620,7 +35620,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
-   }
- 
-   // TODO: This switch could include FNEG and the x86-specific FP logic ops
--  // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid 
-+  // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
-   // missed load folding and fma+fneg combining.
-   switch (Vec.getOpcode()) {
-   case ISD::FMA: // Begin 3 operands
-@@ -35935,10 +35935,8 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
- 
-   // vselect Cond, 000..., X -> andn Cond, X
-   if (TValIsAllZeros) {
--    MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64);
--    SDValue CastCond = DAG.getBitcast(AndNVT, Cond);
--    SDValue CastRHS = DAG.getBitcast(AndNVT, RHS);
--    SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS);
-+    SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
-+    SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
-     return DAG.getBitcast(VT, AndN);
-   }
- 
-@@ -38147,12 +38145,17 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
-   return SDValue();
- }
- 
--/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
--static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
-+
-+/// Try to fold:
-+///   (and (not X), Y) -> (andnp X, Y)
-+///   (and (xor X, -1), Y) -> (andnp X, Y).
-+static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG,
-+                                                 const X86Subtarget &Subtarget) {
-   assert(N->getOpcode() == ISD::AND);
- 
-   MVT VT = N->getSimpleValueType(0);
--  if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
-+  if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector() &&
-+      !(VT.isVector() && VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()))
-     return SDValue();
- 
-   SDValue X, Y;
-@@ -38558,7 +38561,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
-   if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
-     return FPLogic;
- 
--  if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
-+  if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG, Subtarget))
-     return R;
- 
-   if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
-diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
-index 54eddeacaa1..91027fa903f 100644
---- a/lib/Target/X86/X86InstrAVX512.td
-+++ b/lib/Target/X86/X86InstrAVX512.td
-@@ -2978,7 +2978,6 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
- def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
- def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
- // These nodes use 'vnot' instead of 'not' to support vectors.
--def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
- def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
- 
- // TODO - do we need a X86SchedWriteWidths::KMASK type?
-@@ -2986,7 +2985,7 @@ defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XM
- defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
- defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
- defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
--defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
-+defm KANDN : avx512_mask_binop_all<0x42, "kandn", X86andnp,   SchedWriteVecLogic.XMM, 0>;
- defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
- 
- multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
-@@ -3015,7 +3014,7 @@ multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
- }
- 
- defm : avx512_binop_pat<and,   and,  KANDWrr>;
--defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
-+defm : avx512_binop_pat<X86andnp, X86andnp, KANDNWrr>;
- defm : avx512_binop_pat<or,    or,   KORWrr>;
- defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
- defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
-@@ -11570,7 +11569,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
- }
- 
- multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
--                                      AVX512VLVectorVTInfo _Vec, 
-+                                      AVX512VLVectorVTInfo _Vec,
-                                       AVX512VLVectorVTInfo _Tbl> {
-   let Predicates = [HasAVX512] in
-     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
-@@ -11687,7 +11686,7 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
-                             (Op (_.EltVT
-                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
-                                 _.FRC:$src2), (_.EltVT ZeroFP)))),
--      (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 
-+      (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
-           VK1WM:$mask, _.VT:$src1,
-           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
-     def : Pat<(MoveNode (_.VT VR128X:$src1),
-diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll
-index 1ed7b408baf..64320d63eac 100644
---- a/test/CodeGen/X86/avx512-select.ll
-+++ b/test/CodeGen/X86/avx512-select.ll
-@@ -595,3 +595,74 @@ define <16 x i64> @narrowExtractedVectorSelect_crash(<16 x i64> %arg, <16 x i16>
-   %tmp3 = zext <16 x i16> %tmp2 to <16 x i64>
-   ret <16 x i64> %tmp3
- }
-+
-+; Regression test from https://github.com/JuliaLang/julia/issues/36955
-+define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
-+; X86-AVX512F-LABEL: julia_issue36955:
-+; X86-AVX512F:       # %bb.0:
-+; X86-AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
-+; X86-AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
-+; X86-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
-+; X86-AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-+; X86-AVX512F-NEXT:    vcmpnlepd %zmm0, %zmm1, %k1
-+; X86-AVX512F-NEXT:    kandnw %k0, %k1, %k0
-+; X86-AVX512F-NEXT:    kandw %k1, %k0, %k0
-+; X86-AVX512F-NEXT:    knotw %k1, %k1
-+; X86-AVX512F-NEXT:    korw %k1, %k0, %k0
-+; X86-AVX512F-NEXT:    kmovw %k0, %eax
-+; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
-+; X86-AVX512F-NEXT:    vzeroupper
-+; X86-AVX512F-NEXT:    retl
-+;
-+; X64-AVX512F-LABEL: julia_issue36955:
-+; X64-AVX512F:       # %bb.0:
-+; X64-AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
-+; X64-AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
-+; X64-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
-+; X64-AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-+; X64-AVX512F-NEXT:    vcmpnlepd %zmm0, %zmm1, %k1
-+; X64-AVX512F-NEXT:    kandnw %k0, %k1, %k0
-+; X64-AVX512F-NEXT:    kandw %k1, %k0, %k0
-+; X64-AVX512F-NEXT:    knotw %k1, %k1
-+; X64-AVX512F-NEXT:    korw %k1, %k0, %k0
-+; X64-AVX512F-NEXT:    kmovw %k0, %eax
-+; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
-+; X64-AVX512F-NEXT:    vzeroupper
-+; X64-AVX512F-NEXT:    retq
-+;
-+; X86-AVX512BW-LABEL: julia_issue36955:
-+; X86-AVX512BW:       # %bb.0:
-+; X86-AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
-+; X86-AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
-+; X86-AVX512BW-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-+; X86-AVX512BW-NEXT:    vcmpnlepd %zmm0, %zmm1, %k1
-+; X86-AVX512BW-NEXT:    kandnw %k0, %k1, %k0
-+; X86-AVX512BW-NEXT:    kandw %k1, %k0, %k0
-+; X86-AVX512BW-NEXT:    knotw %k1, %k1
-+; X86-AVX512BW-NEXT:    korw %k1, %k0, %k0
-+; X86-AVX512BW-NEXT:    kmovd %k0, %eax
-+; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
-+; X86-AVX512BW-NEXT:    vzeroupper
-+; X86-AVX512BW-NEXT:    retl
-+;
-+; X64-AVX512BW-LABEL: julia_issue36955:
-+; X64-AVX512BW:       # %bb.0:
-+; X64-AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
-+; X64-AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
-+; X64-AVX512BW-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-+; X64-AVX512BW-NEXT:    vcmpnlepd %zmm0, %zmm1, %k1
-+; X64-AVX512BW-NEXT:    kandnw %k0, %k1, %k0
-+; X64-AVX512BW-NEXT:    kandw %k1, %k0, %k0
-+; X64-AVX512BW-NEXT:    knotw %k1, %k1
-+; X64-AVX512BW-NEXT:    korw %k1, %k0, %k0
-+; X64-AVX512BW-NEXT:    kmovd %k0, %eax
-+; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
-+; X64-AVX512BW-NEXT:    vzeroupper
-+; X64-AVX512BW-NEXT:    retq
-+  %fcmp = fcmp ugt <8 x double> %a, zeroinitializer
-+  %xor = xor <8 x i1> %fcmp, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-+  %select1 = select <8 x i1> %fcmp, <8 x i1> zeroinitializer, <8 x i1> %mask
-+  %select2 = select <8 x i1> %xor, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %select1
-+  %ret = bitcast <8 x i1> %select2 to i8
-+  ret i8 %ret
-+}
-diff --git a/test/CodeGen/X86/combine-bitselect.ll b/test/CodeGen/X86/combine-bitselect.ll
-index 8cb6a4dca09..3c08a871c86 100644
---- a/test/CodeGen/X86/combine-bitselect.ll
-+++ b/test/CodeGen/X86/combine-bitselect.ll
-@@ -616,13 +616,13 @@ define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) {
- ; AVX512F:       # %bb.0: # %bb
- ; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
- ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
--; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12]
--; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
-+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
-+; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [12,12,12,12]
- ; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15]
--; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm1, %k2
--; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0 {%k2}
--; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
--; AVX512F-NEXT:    korw %k0, %k1, %k1
-+; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm1, %k0
-+; AVX512F-NEXT:    vpcmpeqd %zmm0, %zmm1, %k2 {%k1}
-+; AVX512F-NEXT:    kandnw %k0, %k1, %k0
-+; AVX512F-NEXT:    korw %k0, %k2, %k1
- ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
- ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
- ; AVX512F-NEXT:    vzeroupper
-diff --git a/test/CodeGen/X86/vec_ssubo.ll b/test/CodeGen/X86/vec_ssubo.ll
-index 515dc5c5aa2..dfb1e7c4dee 100644
---- a/test/CodeGen/X86/vec_ssubo.ll
-+++ b/test/CodeGen/X86/vec_ssubo.ll
-@@ -1640,7 +1640,7 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
- ; AVX512-NEXT:    vptestmd %xmm1, %xmm1, %k0
- ; AVX512-NEXT:    vpslld $31, %xmm0, %xmm0
- ; AVX512-NEXT:    vptestmd %xmm0, %xmm0, %k1
--; AVX512-NEXT:    vptestnmd %xmm1, %xmm1, %k2 {%k1}
-+; AVX512-NEXT:    kandnw %k1, %k0, %k2
- ; AVX512-NEXT:    kxorw %k0, %k1, %k0
- ; AVX512-NEXT:    kxorw %k2, %k0, %k1
- ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-diff --git a/test/CodeGen/X86/vec_usubo.ll b/test/CodeGen/X86/vec_usubo.ll
-index c5a7b19cf14..367c491d25a 100644
---- a/test/CodeGen/X86/vec_usubo.ll
-+++ b/test/CodeGen/X86/vec_usubo.ll
-@@ -1244,10 +1244,10 @@ define <4 x i32> @usubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
- ; AVX512:       # %bb.0:
- ; AVX512-NEXT:    vpslld $31, %xmm0, %xmm0
- ; AVX512-NEXT:    vptestmd %xmm0, %xmm0, %k0
--; AVX512-NEXT:    vpslld $31, %xmm1, %xmm1
--; AVX512-NEXT:    vptestmd %xmm1, %xmm1, %k1
-+; AVX512-NEXT:    vpslld $31, %xmm1, %xmm0
-+; AVX512-NEXT:    vptestmd %xmm0, %xmm0, %k1
- ; AVX512-NEXT:    kxorw %k1, %k0, %k1
--; AVX512-NEXT:    vptestnmd %xmm0, %xmm0, %k2 {%k1}
-+; AVX512-NEXT:    kandnw %k1, %k0, %k2
- ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
- ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k2} {z}
- ; AVX512-NEXT:    kmovd %k1, %eax
diff --git a/deps/patches/llvm-D57118-powerpc.patch b/deps/patches/llvm-D57118-powerpc.patch
deleted file mode 100644
index 328fe205d1280e..00000000000000
--- a/deps/patches/llvm-D57118-powerpc.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-commit 812db527538f30ac77a19d755e24109a6db7e569
-Author: Keno Fischer <keno@juliacomputing.com>
-Date:   Wed Jan 23 16:46:59 2019 -0500
-
-    [CMake][PowerPC] Recognize LLVM_NATIVE_TARGET="ppc64le" as PowerPC
-    
-    Summary:
-    This value is derived from the host triple, which on the machine
-    I'm currently using is `ppc64le-linux-redhat`. This change makes
-    LLVM compile.
-    
-    Reviewers: hfinkel
-    
-    Subscribers: nemanjai, mgorny, jsji, llvm-commits
-    
-    Differential Revision: https://reviews.llvm.org/D57118
-
-diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
-index 900c35ee4f0..b9c9757a4f6 100644
---- a/cmake/config-ix.cmake
-+++ b/cmake/config-ix.cmake
-@@ -386,6 +386,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "sparc")
-   set(LLVM_NATIVE_ARCH Sparc)
- elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
-   set(LLVM_NATIVE_ARCH PowerPC)
-+elseif (LLVM_NATIVE_ARCH MATCHES "ppc64le")
-+  set(LLVM_NATIVE_ARCH PowerPC)
- elseif (LLVM_NATIVE_ARCH MATCHES "aarch64")
-   set(LLVM_NATIVE_ARCH AArch64)
- elseif (LLVM_NATIVE_ARCH MATCHES "arm64")
diff --git a/deps/patches/llvm-exegesis-mingw.patch b/deps/patches/llvm-exegesis-mingw.patch
deleted file mode 100644
index ff11f4da8231e9..00000000000000
--- a/deps/patches/llvm-exegesis-mingw.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 9ba86352649a39b03adce98670714c4c8eb5341d Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Wed, 24 Jul 2019 21:19:20 -0400
-Subject: [PATCH] Fix build of llvm-exegis on mingw32
-
----
- llvm/tools/llvm-exegesis/CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/llvm-exegesis/CMakeLists.txt b/tools/llvm-exegesis/CMakeLists.txt
-index a59e1b74024..7a30e0ea98f 100644
---- a/tools/llvm-exegesis/CMakeLists.txt
-+++ b/tools/llvm-exegesis/CMakeLists.txt
-@@ -4,7 +4,7 @@ set(LLVM_LINK_COMPONENTS
-   native
-   )
- 
--add_llvm_tool(llvm-exegesis
-+add_llvm_tool(llvm-exegesis DISABLE_LLVM_LINK_LLVM_DYLIB
-   llvm-exegesis.cpp
-   )
- 
--- 
-2.22.0
diff --git a/deps/patches/llvm-symver-jlprefix.patch b/deps/patches/llvm-symver-jlprefix.patch
deleted file mode 100644
index 59872380ad0737..00000000000000
--- a/deps/patches/llvm-symver-jlprefix.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-From f23277bb91a4925ba8763337137a3123a7600557 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Tue, 16 Jan 2018 17:29:05 -0500
-Subject: [PATCH] add JL prefix to all LLVM version suffixes
-
----
- tools/llvm-shlib/simple_version_script.map.in | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/llvm-shlib/simple_version_script.map.in b/tools/llvm-shlib/simple_version_script.map.in
-index e9515fe7862..af082581627 100644
---- a/tools/llvm-shlib/simple_version_script.map.in
-+++ b/tools/llvm-shlib/simple_version_script.map.in
-@@ -1 +1 @@
--LLVM_@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@ { global: *; };
-+JL_LLVM_@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@ { global: *; };
---
-2.15.1
diff --git a/deps/patches/llvm-test-plugin-mingw.patch b/deps/patches/llvm-test-plugin-mingw.patch
deleted file mode 100644
index 14cf07166d8610..00000000000000
--- a/deps/patches/llvm-test-plugin-mingw.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 9bd3774db73533c8df475639805ff1516aea274c Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Wed, 24 Jul 2019 21:45:33 -0400
-Subject: [PATCH] add missing components to TestPlugin
-
----
- llvm/unittests/Passes/CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/unittests/Passes/CMakeLists.txt b/unittests/Passes/CMakeLists.txt
-index 3e83b527958..4b09f47c234 100644
---- a/unittests/Passes/CMakeLists.txt
-+++ b/unittests/Passes/CMakeLists.txt
-@@ -14,7 +14,7 @@ add_llvm_unittest(PluginsTests
- export_executable_symbols(PluginsTests)
- target_link_libraries(PluginsTests PRIVATE LLVMTestingSupport)
- 
--set(LLVM_LINK_COMPONENTS)
-+set(LLVM_LINK_COMPONENTS Support Passes Core)
- add_llvm_library(TestPlugin MODULE BUILDTREE_ONLY
-   TestPlugin.cpp
-   )
--- 
-2.22.0
diff --git a/deps/patches/llvm7-D50010-VNCoercion-ni.patch b/deps/patches/llvm7-D50010-VNCoercion-ni.patch
deleted file mode 100644
index 729c4185128c9e..00000000000000
--- a/deps/patches/llvm7-D50010-VNCoercion-ni.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
-index 948d9bd5baa..fbd5b9bb3be 100644
---- a/lib/Transforms/Utils/VNCoercion.cpp
-+++ b/lib/Transforms/Utils/VNCoercion.cpp
-@@ -20,7 +20,8 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
-       StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
-     return false;
- 
--  uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType());
-+  Type *StoredValTy = StoredVal->getType();
-+  uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy);
- 
-   // The store size must be byte-aligned to support future type casts.
-   if (llvm::alignTo(StoreSize, 8) != StoreSize)
-@@ -30,10 +31,15 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
-   if (StoreSize < DL.getTypeSizeInBits(LoadTy))
-     return false;
- 
--  // Don't coerce non-integral pointers to integers or vice versa.
--  if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
--      DL.isNonIntegralPointerType(LoadTy))
-+  bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
-+  bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
-+  if (StoredNI != LoadNI) {
-+    return false;
-+  } else if (StoredNI && LoadNI &&
-+             cast<PointerType>(StoredValTy)->getAddressSpace() !=
-+                 cast<PointerType>(LoadTy)->getAddressSpace()) {
-     return false;
-+  }
- 
-   return true;
- }
-diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
-index 9ae4132231d..5217fc1a06a 100644
---- a/test/Transforms/GVN/non-integral-pointers.ll
-+++ b/test/Transforms/GVN/non-integral-pointers.ll
-@@ -1,6 +1,6 @@
- ; RUN: opt -gvn -S < %s | FileCheck %s
- 
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
- target triple = "x86_64-unknown-linux-gnu"
- 
- define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
-@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
-  alwaysTaken:
-   ret i64 42
- }
-+
-+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
-+ ; CHECK-LABEL: @multini(
-+ ; CHECK-NOT: inttoptr
-+ ; CHECK-NOT: ptrtoint
-+ ; CHECK-NOT: addrspacecast
-+  entry:
-+   store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
-+   br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
-+
-+  neverTaken:
-+   %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
-+   %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
-+   ret i8 addrspace(5)* %differentas
-+
-+  alwaysTaken:
-+   ret i8 addrspace(5)* null
-+ }
diff --git a/deps/patches/llvm7-windows-race.patch b/deps/patches/llvm7-windows-race.patch
deleted file mode 100644
index b6ae6bae43ba42..00000000000000
--- a/deps/patches/llvm7-windows-race.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
-index f59402ac4b0..5de4c6febe7 100644
---- a/tools/llvm-config/CMakeLists.txt
-+++ b/tools/llvm-config/CMakeLists.txt
-@@ -77,5 +77,10 @@ if(CMAKE_CROSSCOMPILING AND NOT LLVM_CONFIG_PATH)
-   add_custom_target(NativeLLVMConfig DEPENDS ${LLVM_CONFIG_PATH})
-   add_dependencies(NativeLLVMConfig CONFIGURE_LLVM_NATIVE)
- 
-+  # Add a dependency on the host tblgen, which uses the same working
-+  # directory and with which we're otherwise racing to build some
-+  # of the utility libraries.
-+  add_dependencies(NativeLLVMConfig LLVM-tablegen-host)
-+
-   add_dependencies(llvm-config NativeLLVMConfig)
- endif()
diff --git a/deps/patches/llvm9-D71443-PPC-MC-redef-symbol.patch b/deps/patches/llvm9-D71443-PPC-MC-redef-symbol.patch
deleted file mode 100644
index 904514a60f83f3..00000000000000
--- a/deps/patches/llvm9-D71443-PPC-MC-redef-symbol.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 5cd52dbfa9c60cfd12676924bed97701ee9bc4ef Mon Sep 17 00:00:00 2001
-From: Fangrui Song <maskray@google.com>
-Date: Thu, 12 Dec 2019 16:18:57 -0800
-Subject: [PATCH] [MC][PowerPC] Fix a crash when redefining a symbol after .set
-
-Fix PR44284. This is probably not valid assembly but we should not crash.
-
-Reviewed By: luporl, #powerpc, steven.zhang
-
-Differential Revision: https://reviews.llvm.org/D71443
-
-(cherry picked from commit f99eedeb72644671cd584f48e4c136d47f6b0020)
----
- llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 3 ++-
- llvm/test/MC/PowerPC/ppc64-localentry-symbols.s          | 5 +++++
- 2 files changed, 7 insertions(+), 1 deletion(-)
-
-diff --git llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-index 90c3c8d20ed..71f926c265e 100644
---- llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-+++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-@@ -196,7 +196,8 @@ public:
- 
-   void finish() override {
-     for (auto *Sym : UpdateOther)
--      copyLocalEntry(Sym, Sym->getVariableValue());
-+      if (Sym->isVariable())
-+        copyLocalEntry(Sym, Sym->getVariableValue());
-   }
- 
- private:
-diff --git llvm/test/MC/PowerPC/ppc64-localentry-symbols.s llvm/test/MC/PowerPC/ppc64-localentry-symbols.s
-index f1d5c5d0ab1..a663af57ad4 100644
---- llvm/test/MC/PowerPC/ppc64-localentry-symbols.s
-+++ llvm/test/MC/PowerPC/ppc64-localentry-symbols.s
-@@ -32,3 +32,8 @@ func:
-   nop
-   nop
-   .localentry func, 8
-+
-+## PR44284 Don't crash if err is redefined after .set
-+.set err, _err
-+.globl err
-+err:
--- 
-2.26.0
-
diff --git a/deps/patches/neoverse-generic-kernels.patch b/deps/patches/neoverse-generic-kernels.patch
new file mode 100644
index 00000000000000..ab37e3783bf3e5
--- /dev/null
+++ b/deps/patches/neoverse-generic-kernels.patch
@@ -0,0 +1,19 @@
+diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1
+index ea010db4..074d7215 100644
+--- a/kernel/arm64/KERNEL.NEOVERSEN1
++++ b/kernel/arm64/KERNEL.NEOVERSEN1
+@@ -91,10 +91,10 @@ IDAMAXKERNEL   = iamax_thunderx2t99.c
+ ICAMAXKERNEL   = izamax_thunderx2t99.c
+ IZAMAXKERNEL   = izamax_thunderx2t99.c
+ 
+-SNRM2KERNEL    = scnrm2_thunderx2t99.c
+-DNRM2KERNEL    = dznrm2_thunderx2t99.c
+-CNRM2KERNEL    = scnrm2_thunderx2t99.c
+-ZNRM2KERNEL    = dznrm2_thunderx2t99.c
++SNRM2KERNEL    = nrm2.S
++DNRM2KERNEL    = nrm2.S
++CNRM2KERNEL    = znrm2.S
++ZNRM2KERNEL    = znrm2.S
+ 
+ DDOTKERNEL     = dot_thunderx2t99.c
+ SDOTKERNEL     = dot_thunderx2t99.c
diff --git a/deps/patches/openblas-exshift.patch b/deps/patches/openblas-exshift.patch
deleted file mode 100644
index 4a0016ef662520..00000000000000
--- a/deps/patches/openblas-exshift.patch
+++ /dev/null
@@ -1,149 +0,0 @@
-commit c4b5abbe43d7c22215ef36ef4f7c1413c975678c
-Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date:   Fri Jan 29 10:45:36 2021 +0100
-
-    fix data type
-
-commit f87842483eee9d158f44d51d4c09662c3cff7526
-Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date:   Fri Jan 29 09:56:12 2021 +0100
-
-    fix calculation of non-exceptional shift (from Reference-LAPACK PR 477)
-
-commit 856bc365338f7559639f341d76ca8746d1628ee5
-Author: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
-Date:   Wed Jan 27 13:41:45 2021 +0100
-
-    Add exceptional shift to fix rare convergence problems
-
----
-diff --git a/lapack-netlib/SRC/chgeqz.f b/lapack-netlib/SRC/chgeqz.f
-index 73d35621..4725e716 100644
---- a/lapack-netlib/SRC/chgeqz.f
-+++ b/lapack-netlib/SRC/chgeqz.f
-@@ -320,12 +320,13 @@
-      $                   C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
-       COMPLEX            ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
-      $                   CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1,
--     $                   U12, X
-+     $                   U12, X, ABI12, Y
- *     ..
- *     .. External Functions ..
-+      COMPLEX            CLADIV
-       LOGICAL            LSAME
-       REAL               CLANHS, SLAMCH
--      EXTERNAL           LSAME, CLANHS, SLAMCH
-+      EXTERNAL           CLADIV, LLSAME, CLANHS, SLAMCH
- *     ..
- *     .. External Subroutines ..
-       EXTERNAL           CLARTG, CLASET, CROT, CSCAL, XERBLA
-@@ -729,22 +730,34 @@
-             AD22 = ( ASCALE*H( ILAST, ILAST ) ) /
-      $             ( BSCALE*T( ILAST, ILAST ) )
-             ABI22 = AD22 - U12*AD21
-+            ABI12 = AD12 - U12*AD11
- *
--            T1 = HALF*( AD11+ABI22 )
--            RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 )
--            TEMP = REAL( T1-ABI22 )*REAL( RTDISC ) +
--     $             AIMAG( T1-ABI22 )*AIMAG( RTDISC )
--            IF( TEMP.LE.ZERO ) THEN
--               SHIFT = T1 + RTDISC
--            ELSE
--               SHIFT = T1 - RTDISC
-+            SHIFT = ABI22
-+            CTEMP = SQRT( ABI12 )*SQRT( AD21 )
-+            TEMP = ABS1( CTEMP )
-+            IF( CTEMP.NE.ZERO ) THEN
-+               X = HALF*( AD11-SHIFT )
-+               TEMP2 = ABS1( X )
-+               TEMP = MAX( TEMP, ABS1( X ) )
-+               Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 )
-+               IF( TEMP2.GT.ZERO ) THEN
-+                  IF( REAL( X / TEMP2 )*REAL( Y )+
-+     $                AIMAG( X / TEMP2 )*AIMAG( Y ).LT.ZERO )Y = -Y
-+               END IF
-+               SHIFT = SHIFT - CTEMP*CLADIV( CTEMP, ( X+Y ) )
-             END IF
-          ELSE
- *
- *           Exceptional shift.  Chosen for no particularly good reason.
- *
--            ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/
--     $                        (BSCALE*T(ILAST-1,ILAST-1))
-+            IF( ( IITER / 20 )*20.EQ.IITER .AND. 
-+     $         BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN
-+               ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
-+     $            ILAST ) )/( BSCALE*T( ILAST, ILAST ) )
-+            ELSE
-+               ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
-+     $            ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) )
-+            END IF
-             SHIFT = ESHIFT
-          END IF
- *
-diff --git a/lapack-netlib/SRC/zhgeqz.f b/lapack-netlib/SRC/zhgeqz.f
-index b51cba4f..b28ae47a 100644
---- a/lapack-netlib/SRC/zhgeqz.f
-+++ b/lapack-netlib/SRC/zhgeqz.f
-@@ -320,12 +320,13 @@
-      $                   C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
-       COMPLEX*16         ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
-      $                   CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1,
--     $                   U12, X
-+     $                   U12, X, ABI12, Y
- *     ..
- *     .. External Functions ..
-+      COMPLEX*16         ZLADIV
-       LOGICAL            LSAME
-       DOUBLE PRECISION   DLAMCH, ZLANHS
--      EXTERNAL           LSAME, DLAMCH, ZLANHS
-+      EXTERNAL           ZLADIV, LSAME, DLAMCH, ZLANHS
- *     ..
- *     .. External Subroutines ..
-       EXTERNAL           XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL
-@@ -730,22 +731,34 @@
-             AD22 = ( ASCALE*H( ILAST, ILAST ) ) /
-      $             ( BSCALE*T( ILAST, ILAST ) )
-             ABI22 = AD22 - U12*AD21
-+            ABI12 = AD12 - U12*AD11
- *
--            T1 = HALF*( AD11+ABI22 )
--            RTDISC = SQRT( T1**2+AD12*AD21-AD11*AD22 )
--            TEMP = DBLE( T1-ABI22 )*DBLE( RTDISC ) +
--     $             DIMAG( T1-ABI22 )*DIMAG( RTDISC )
--            IF( TEMP.LE.ZERO ) THEN
--               SHIFT = T1 + RTDISC
--            ELSE
--               SHIFT = T1 - RTDISC
-+            SHIFT = ABI22
-+            CTEMP = SQRT( ABI12 )*SQRT( AD21 )
-+            TEMP = ABS1( CTEMP )
-+            IF( CTEMP.NE.ZERO ) THEN
-+               X = HALF*( AD11-SHIFT )
-+               TEMP2 = ABS1( X )
-+               TEMP = MAX( TEMP, ABS1( X ) )
-+               Y = TEMP*SQRT( ( X / TEMP )**2+( CTEMP / TEMP )**2 )
-+               IF( TEMP2.GT.ZERO ) THEN
-+                  IF( DBLE( X / TEMP2 )*DBLE( Y )+
-+     $                DIMAG( X / TEMP2 )*DIMAG( Y ).LT.ZERO )Y = -Y
-+               END IF
-+               SHIFT = SHIFT - CTEMP*ZLADIV( CTEMP, ( X+Y ) )
-             END IF
-          ELSE
- *
- *           Exceptional shift.  Chosen for no particularly good reason.
- *
--            ESHIFT = ESHIFT + (ASCALE*H(ILAST,ILAST-1))/
--     $                        (BSCALE*T(ILAST-1,ILAST-1))
-+            IF( ( IITER / 20 )*20.EQ.IITER .AND. 
-+     $         BSCALE*ABS1(T( ILAST, ILAST )).GT.SAFMIN ) THEN
-+               ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
-+     $            ILAST ) )/( BSCALE*T( ILAST, ILAST ) )
-+            ELSE
-+               ESHIFT = ESHIFT + ( ASCALE*H( ILAST,
-+     $            ILAST-1 ) )/( BSCALE*T( ILAST-1, ILAST-1 ) )
-+            END IF
-             SHIFT = ESHIFT
-          END IF
- *
diff --git a/deps/patches/openblas-ofast-power.patch b/deps/patches/openblas-ofast-power.patch
index 2bb01c9b08115d..c741496cae757c 100644
--- a/deps/patches/openblas-ofast-power.patch
+++ b/deps/patches/openblas-ofast-power.patch
@@ -1,19 +1,17 @@
- Makefile.power | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
 diff --git a/Makefile.power b/Makefile.power
-index c7e97229..8426e816 100644
+index 946f5523..19593050 100644
 --- a/Makefile.power
 +++ b/Makefile.power
-@@ -10,13 +10,13 @@ USE_OPENMP = 1
- endif
- 
+@@ -11,14 +11,14 @@ endif
+
  ifeq ($(CORE), POWER10)
+ ifneq ($(C_COMPILER), PGI)
 -CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
 +CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
  FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10  -fno-fast-math
  endif
- 
+ endif
+
  ifeq ($(CORE), POWER9)
  ifneq ($(C_COMPILER), PGI)
 -CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
@@ -21,8 +19,8 @@ index c7e97229..8426e816 100644
  ifeq ($(C_COMPILER), GCC)
  ifneq ($(GCCVERSIONGT4), 1)
  $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
-@@ -49,7 +49,7 @@ endif
- 
+@@ -51,7 +51,7 @@ endif
+
  ifeq ($(CORE), POWER8)
  ifneq ($(C_COMPILER), PGI)
 -CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx  -fno-fast-math
diff --git a/deps/patches/openblas-winexit.patch b/deps/patches/openblas-winexit.patch
index 01085102f331a2..33389f34a40742 100644
--- a/deps/patches/openblas-winexit.patch
+++ b/deps/patches/openblas-winexit.patch
@@ -1,13 +1,5 @@
-From f919c3301fabbaa5d965dcc7b1c3d6892a8c730a Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Sat, 14 Mar 2020 12:05:19 +0100
-
----
- driver/others/memory.c | 131 +------------------------------------------------
- 1 file changed, 2 insertions(+), 129 deletions(-)
-
 diff --git a/driver/others/memory.c b/driver/others/memory.c
-index ba2bb55b..bf6b5529 100644
+index 6e654ccf..1d2f9f12 100644
 --- a/driver/others/memory.c
 +++ b/driver/others/memory.c
 @@ -1534,7 +1534,7 @@ void CONSTRUCTOR gotoblas_init(void) {
@@ -19,11 +11,10 @@ index ba2bb55b..bf6b5529 100644
  
    if (gotoblas_initialized == 0) return;
  
-@@ -1571,74 +1571,12 @@ void DESTRUCTOR gotoblas_quit(void) {
- #endif
+@@ -1572,75 +1572,11 @@ void DESTRUCTOR gotoblas_quit(void) {
  }
  
--#if defined(_MSC_VER) && !defined(__clang__)
+ #if defined(_MSC_VER) && !defined(__clang__)
 -BOOL APIENTRY DllMain(HMODULE hModule, DWORD  ul_reason_for_call, LPVOID lpReserved)
 -{
 -  switch (ul_reason_for_call)
@@ -65,16 +56,18 @@ index ba2bb55b..bf6b5529 100644
  #else
  #pragma comment(linker, "/INCLUDE:__tls_used")
  #endif
- 
+-
 -#ifdef _WIN64
 -#pragma const_seg(".CRT$XLB")
 -#else
 -#pragma data_seg(".CRT$XLB")
 -#endif
--static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
+-
 -#ifdef _WIN64
+-static const PIMAGE_TLS_CALLBACK dll_callback(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
 -#pragma const_seg()
 -#else
+-static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
 -#pragma data_seg()
 -#endif
 -
@@ -83,18 +76,18 @@ index ba2bb55b..bf6b5529 100644
 -#else
 -#pragma data_seg(".CRT$XTU")
 -#endif
--static int(*p_process_term)(void) = on_process_term;
+-
 -#ifdef _WIN64
+-static const int(*p_process_term)(void) = on_process_term;
 -#pragma const_seg()
 -#else
+-static int(*p_process_term)(void) = on_process_term;
 -#pragma data_seg()
 -#endif
--#endif
--
+ #endif
+ 
  #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
- /* Don't call me; this is just work around for PGI / Sun bug */
- void gotoblas_dummy_for_PGI(void) {
-@@ -3136,7 +3074,7 @@ void CONSTRUCTOR gotoblas_init(void) {
+@@ -3146,7 +3082,7 @@ void CONSTRUCTOR gotoblas_init(void) {
  
  }
  
@@ -103,7 +96,7 @@ index ba2bb55b..bf6b5529 100644
  
    if (gotoblas_initialized == 0) return;
  
-@@ -3165,71 +3103,6 @@ void DESTRUCTOR gotoblas_quit(void) {
+@@ -3175,71 +3111,6 @@ void DESTRUCTOR gotoblas_quit(void) {
  #endif
  }
  
diff --git a/deps/pcre.mk b/deps/pcre.mk
index 2120f3c95bc3ad..67185a7213c855 100644
--- a/deps/pcre.mk
+++ b/deps/pcre.mk
@@ -14,7 +14,7 @@ $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted: $(SRCCACHE)/pcre2-$(PCRE_VER).ta
 	cp $(SRCDIR)/patches/config.sub $(SRCCACHE)/pcre2-$(PCRE_VER)/config.sub
 	echo 1 > $@
 
-checksum-pcre2: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
+checksum-pcre: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
 	$(JLCHECKSUM) $<
 
 $(SRCCACHE)/pcre2-$(PCRE_VER)/pcre2-sljit-apple-silicon-support.patch-applied: $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted
diff --git a/deps/suitesparse.mk b/deps/suitesparse.mk
deleted file mode 100644
index 5be8589875dc4f..00000000000000
--- a/deps/suitesparse.mk
+++ /dev/null
@@ -1,152 +0,0 @@
-## SUITESPARSE ##
-
-ifeq ($(USE_BLAS64), 1)
-UMFPACK_CONFIG := -DLONGBLAS='long long'
-CHOLMOD_CONFIG := -DLONGBLAS='long long'
-SPQR_CONFIG := -DLONGBLAS='long long'
-UMFPACK_CONFIG += -DSUN64
-CHOLMOD_CONFIG += -DSUN64
-SPQR_CONFIG += -DSUN64
-endif
-
-# Disable linking to libmetis
-CHOLMOD_CONFIG += -DNPARTITION
-
-ifneq ($(USE_BINARYBUILDER_SUITESPARSE), 1)
-
-SUITESPARSE_PROJECTS := AMD BTF CAMD CCOLAMD COLAMD CHOLMOD LDL KLU UMFPACK RBio SPQR
-SUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig amd btf camd ccolamd colamd cholmod klu ldl umfpack rbio spqr)
-
-SUITE_SPARSE_LIB := $(LDFLAGS) -L"$(abspath $(BUILDDIR))/SuiteSparse-$(SUITESPARSE_VER)/lib"
-ifeq ($(OS), Darwin)
-SUITE_SPARSE_LIB += $(RPATH_ESCAPED_ORIGIN)
-endif
-SUITESPARSE_MFLAGS := CC="$(CC)" CXX="$(CXX)" F77="$(FC)" AR="$(AR)" RANLIB="$(RANLIB)" BLAS="-L$(build_shlibdir) -lblastrampoline" LAPACK="-L$(build_shlibdir) -lblastrampoline" \
-	  LDFLAGS="$(SUITE_SPARSE_LIB)" CFOPENMP="" CUDA=no CUDA_PATH="" \
-	  UMFPACK_CONFIG="$(UMFPACK_CONFIG)" CHOLMOD_CONFIG="$(CHOLMOD_CONFIG)" SPQR_CONFIG="$(SPQR_CONFIG)"
-ifeq ($(OS),WINNT)
-SUITESPARSE_MFLAGS += UNAME=Windows
-else
-SUITESPARSE_MFLAGS += UNAME=$(OS)
-endif
-
-$(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/DrTimothyAldenDavis/SuiteSparse/archive/v$(SUITESPARSE_VER).tar.gz
-
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted: $(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz
-	$(JLCHECKSUM) $<
-	mkdir -p $(dir $@)
-	$(TAR) -C $(dir $@) --strip-components 1 -zxf $<
-	echo 1 > $@
-
-checksum-suitesparse: $(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz
-	$(JLCHECKSUM) $<
-
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse-winclang.patch-applied: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted
-	cd $(dir $@) && patch -p0 < $(SRCDIR)/patches/SuiteSparse-winclang.patch
-	echo 1 > $@
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse-shlib.patch-applied: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/SuiteSparse-shlib.patch
-	echo 1 > $@
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse-winclang.patch-applied
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse-shlib.patch-applied
-
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: | $(build_prefix)/manifest/blastrampoline
-
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted
-	$(MAKE) -C $(dir $<)SuiteSparse_config library config $(SUITESPARSE_MFLAGS)
-	$(INSTALL_NAME_CMD)libsuitesparseconfig.$(SHLIB_EXT) $(dir $<)lib/libsuitesparseconfig.$(SHLIB_EXT)
-	for PROJ in $(SUITESPARSE_PROJECTS); do \
-		$(MAKE) -C $(dir $<)$${PROJ} library $(SUITESPARSE_MFLAGS) || exit 1; \
-		$(INSTALL_NAME_CMD)lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) $(dir $<)lib/lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) || exit 1; \
-	done
-	echo 1 > $@
-
-ifeq ($(OS),WINNT)
-SUITESPARSE_SHLIB_ENV:=PATH="$(abspath $(dir $<))lib:$(build_bindir):$(PATH)"
-else
-SUITESPARSE_SHLIB_ENV:=LD_LIBRARY_PATH="$(build_shlibdir)"
-endif
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-checked: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled
-	for PROJ in $(SUITESPARSE_PROJECTS); do \
-		$(SUITESPARSE_SHLIB_ENV) $(MAKE) -C $(dir $<)$${PROJ} default $(SUITESPARSE_MFLAGS) || exit 1; \
-	done
-	echo 1 > $@
-
-$(build_prefix)/manifest/suitesparse: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled | $(build_prefix)/manifest $(build_shlibdir)
-	for lib in $(SUITESPARSE_LIBS); do \
-		cp -a $(dir $<)lib/lib$${lib} $(build_shlibdir) || exit 1; \
-	done
-	#cp -a $(dir $<)lib/* $(build_shlibdir)
-	#cp -a $(dir $<)include/* $(build_includedir)
-	echo $(SUITESPARSE_VER) > $@
-
-uninstall-suitesparse:
-	-rm $(build_prefix)/manifest/suitesparse
-	-rm $(addprefix $(build_shlibdir)/lib, $(SUITESPARSE_LIBS))
-
-clean-suitesparse: clean-suitesparse-wrapper uninstall-suitesparse
-	-rm $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled
-	-rm -fr $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/lib
-	-rm -fr $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/include
-	-$(MAKE) -C $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER) clean
-
-distclean-suitesparse: clean-suitesparse-wrapper
-	-rm -rf $(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz \
-		$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)
-
-get-suitesparse: $(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz
-extract-suitesparse: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted
-configure-suitesparse: extract-suitesparse
-compile-suitesparse: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled
-fastcheck-suitesparse: #none
-check-suitesparse: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-checked
-install-suitesparse: $(build_prefix)/manifest/suitesparse install-suitesparse-wrapper
-
-# SUITESPARSE WRAPPER
-
-ifeq ($(USE_SYSTEM_SUITESPARSE), 1)
-SUITESPARSE_INC := -I $(LOCALBASE)/include/suitesparse
-SUITESPARSE_LIB := -lumfpack -lcholmod -lamd -lcamd -lcolamd -lspqr
-else
-SUITESPARSE_INC := -I $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/CHOLMOD/Include -I $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse_config -I $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SPQR/Include
-SUITESPARSE_LIB := -L$(build_shlibdir) -lcholmod -lumfpack -lspqr $(RPATH_ORIGIN)
-$(build_shlibdir)/libsuitesparse_wrapper.$(SHLIB_EXT): $(build_prefix)/manifest/suitesparse
-endif
-
-$(build_shlibdir)/libsuitesparse_wrapper.$(SHLIB_EXT): $(SRCDIR)/SuiteSparse_wrapper.c
-	mkdir -p $(build_shlibdir)
-	$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -O2 -shared $(fPIC) $(SUITESPARSE_INC) $< -o $@ $(SUITESPARSE_LIB)
-	$(INSTALL_NAME_CMD)libsuitesparse_wrapper.$(SHLIB_EXT) $@
-	touch -c $@
-
-clean-suitesparse-wrapper:
-	-rm -f $(build_shlibdir)/libsuitesparse_wrapper.$(SHLIB_EXT)
-
-distclean-suitesparse-wrapper: clean-suitesparse-wrapper
-
-get-suitesparse-wrapper:
-extract-suitesparse-wrapper:
-configure-suitesparse-wrapper:
-compile-suitesparse-wrapper:
-fastcheck-suitesparse-wrapper: #none
-check-suitesparse-wrapper:
-install-suitesparse-wrapper: $(build_shlibdir)/libsuitesparse_wrapper.$(SHLIB_EXT)
-
-else # USE_BINARYBUILDER_SUITESPARSE
-
-$(eval $(call bb-install,suitesparse,SUITESPARSE,false))
-
-get-suitesparse-wrapper: get-suitesparse
-extract-suitesparse-wrapper: extract-suitesparse
-configure-suitesparse-wrapper: configure-suitesparse
-compile-suitesparse-wrapper: compile-suitesparse
-fastcheck-suitesparse-wrapper: fastcheck-suitesparse
-check-suitesparse-wrapper: check-suitesparse
-clean-suitesparse-wrapper: clean-suitesparse
-distclean-suitesparse-wrapper: distclean-suitesparse
-install-suitesparse-wrapper: install-suitesparse
-
-# suitesparse depends on blastrampoline
-compile-suitesparse: | $(build_prefix)/manifest/blastrampoline
-endif
diff --git a/deps/tools/bb-install.mk b/deps/tools/bb-install.mk
index 5c18ddee0bfce7..781d66f1c5dda8 100644
--- a/deps/tools/bb-install.mk
+++ b/deps/tools/bb-install.mk
@@ -51,7 +51,7 @@ ifneq (bsdtar,$(findstring bsdtar,$(TAR_TEST)))
 	@# work-around a gtar bug: they do some complicated work to avoid the mkdir
 	@# syscall, which is buggy when working with Tar.jl files so we manually do
 	@# the mkdir calls first in a pre-pass
-	$(TAR) -tzf $$< | xargs -L 1 dirname | sort -u | (cd $$(build_prefix) && xargs -t mkdir -p)
+	$(TAR) -tzf $$< | xargs -n 1 dirname | sort -u | (cd $$(build_prefix) && xargs -t mkdir -p)
 endif
 	$(UNTAR) $$< -C $$(build_prefix)
 	echo '$$(UNINSTALL_$(strip $1))' > $$@
diff --git a/deps/tools/common.mk b/deps/tools/common.mk
index 2fbceff51fdf8b..aacae86139ee61 100644
--- a/deps/tools/common.mk
+++ b/deps/tools/common.mk
@@ -235,6 +235,6 @@ endif
 
 ## phony targets ##
 
-.PHONY: default get extract configure compile fastcheck check install uninstall reinstall cleanall distcleanall \
+.PHONY: default get extract configure compile fastcheck check install uninstall reinstall cleanall distcleanall version-check \
 	get-* extract-* configure-* compile-* fastcheck-* check-* install-* uninstall-* reinstall-* clean-* distclean-* \
 	update-llvm
diff --git a/deps/tools/uninstallers.mk b/deps/tools/uninstallers.mk
index e6e37292ab1c4f..48387914643db0 100644
--- a/deps/tools/uninstallers.mk
+++ b/deps/tools/uninstallers.mk
@@ -2,10 +2,6 @@
 # defines uninstallers and version-checks
 # based on the contents of the UNINSTALL_* variables and the manifest files
 
-install: version-check
-version-check: $(addprefix version-check-, $(DEP_LIBS_STAGED))
-uninstall: $(addprefix uninstall-, $(DEP_LIBS_STAGED))
-
 ## read 'uninstall-*' definition from either the manifest or the current session
 define define-uninstaller
 MANIFEST_$1 := $$(shell [ -e $$(build_prefix)/manifest/$1 ] && cat $$(build_prefix)/manifest/$1)
@@ -17,12 +13,15 @@ ifneq ($$(UNINST_HOW_$1),)
 UNINST_WHO_$1 := $$(firstword $$(MANIFEST_$1))
 UNINST_WHERE_$1 := $$(wordlist 3,99,$$(MANIFEST_$1))
 $$(eval $$(call $$(UNINST_HOW_$1),$1,$$(UNINST_WHO_$1),$$(UNINST_WHERE_$1)))
+else
+uninstall-$1:
+	@echo "skipping uninstall: $1 not installed"
 endif
 endef
-$(foreach dep,$(DEP_LIBS_STAGED),$(eval $(call define-uninstaller,$(dep))))
+$(foreach dep,$(DEP_LIBS_STAGED_ALL),$(eval $(call define-uninstaller,$(dep))))
 
 # for each subproject with a manifest, keep the user aware if something is not the expected version
-$(addprefix version-check-,$(DEP_LIBS_STAGED)) : version-check-% : install-%
+$(addprefix version-check-,$(DEP_LIBS_STAGED_ALL)) : version-check-% : install-%
 	@if [ ! -e $(build_prefix)/manifest/$* ] || ( \
 			[ "1" != "`wc -w $(build_prefix)/manifest/$* | cut -f 1 -d ' '`" ] && \
 			[ "$(UNINSTALL_$*)" != "`cat $(build_prefix)/manifest/$*`" ]) ; then \
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
index 455ab49223998e..468cb3bfbc3cb8 100644
--- a/doc/Manifest.toml
+++ b/doc/Manifest.toml
@@ -1,147 +1,89 @@
 # This file is machine-generated - editing it directly is not advised
 
-[[ArgTools]]
-uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
+manifest_format = "2.0"
 
-[[Artifacts]]
-uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
-
-[[Base64]]
+[[deps.Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 
-[[Dates]]
+[[deps.Dates]]
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
-[[DocStringExtensions]]
-deps = ["LibGit2", "Markdown", "Pkg", "Test"]
-git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1"
+[[deps.DocStringExtensions]]
+deps = ["LibGit2"]
+git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.3"
+version = "0.8.5"
 
-[[Documenter]]
+[[deps.Documenter]]
 deps = ["Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "3ebb967819b284dc1e3c0422229b58a40a255649"
+git-tree-sha1 = "621850838b3e74dd6dd047b5432d2e976877104e"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.26.3"
-
-[[Downloads]]
-deps = ["ArgTools", "LibCURL", "NetworkOptions"]
-uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+version = "0.27.2"
 
-[[IOCapture]]
-deps = ["Logging"]
-git-tree-sha1 = "377252859f740c217b936cebcd918a44f9b53b59"
+[[deps.IOCapture]]
+deps = ["Logging", "Random"]
+git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a"
 uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
-version = "0.1.1"
+version = "0.2.2"
 
-[[InteractiveUtils]]
+[[deps.InteractiveUtils]]
 deps = ["Markdown"]
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 
-[[JSON]]
+[[deps.JSON]]
 deps = ["Dates", "Mmap", "Parsers", "Unicode"]
 git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
 uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 version = "0.21.1"
 
-[[LibCURL]]
-deps = ["LibCURL_jll", "MozillaCACerts_jll"]
-uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
-
-[[LibCURL_jll]]
-deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
-uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-
-[[LibGit2]]
+[[deps.LibGit2]]
 deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
 
-[[LibSSH2_jll]]
-deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
-uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-
-[[Libdl]]
-uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-
-[[Logging]]
+[[deps.Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
-[[Markdown]]
+[[deps.Markdown]]
 deps = ["Base64"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 
-[[MbedTLS_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-
-[[Mmap]]
+[[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 
-[[MozillaCACerts_jll]]
-uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-
-[[NetworkOptions]]
+[[deps.NetworkOptions]]
 uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 
-[[Parsers]]
+[[deps.Parsers]]
 deps = ["Dates"]
-git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714"
+git-tree-sha1 = "c8abc88faa3f7a3950832ac5d6e690881590d6dc"
 uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "1.0.15"
+version = "1.1.0"
 
-[[Pkg]]
-deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
-uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-
-[[Printf]]
+[[deps.Printf]]
 deps = ["Unicode"]
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 
-[[REPL]]
+[[deps.REPL]]
 deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 
-[[Random]]
+[[deps.Random]]
 deps = ["Serialization"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
-[[SHA]]
+[[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
-[[Serialization]]
+[[deps.Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 
-[[Sockets]]
+[[deps.Sockets]]
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
 
-[[TOML]]
-deps = ["Dates"]
-uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-
-[[Tar]]
-deps = ["ArgTools", "SHA"]
-uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
-
-[[Test]]
+[[deps.Test]]
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
-[[UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[Unicode]]
+[[deps.Unicode]]
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
-[[Zlib_jll]]
-deps = ["Libdl"]
-uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-
-[[nghttp2_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-
-[[p7zip_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/doc/build/build.md b/doc/build/build.md
index 3d351bd83e71c6..89b293a57df304 100644
--- a/doc/build/build.md
+++ b/doc/build/build.md
@@ -250,21 +250,6 @@ Julia uses a custom fork of libuv. It is a small dependency, and can be safely b
 
 As a high-performance numerical language, Julia should be linked to a multi-threaded BLAS and LAPACK, such as OpenBLAS or ATLAS, which will provide much better performance than the reference `libblas` implementations which may be default on some systems.
 
-### Intel MKL
-
-**Note:** If you are building Julia for the sole purpose of incorporating Intel MKL, it may be beneficial to first try [MKL.jl](https://github.com/JuliaComputing/MKL.jl). This package will automatically download MKL and rebuild Julia's system image against it, sidestepping the need to set up a working build environment just to add MKL functionality. MKL.jl replaces OpenBLAS with MKL for dense linear algebra functions called directly from Julia, but SuiteSparse and other C/Fortran libraries will continue to use the BLAS they were linked against at build time. If you want SuiteSparse to use MKL, you will need to build from source.
-
-For a 64-bit architecture, the environment should be set up as follows:
-```sh
-# bash
-source /path/to/intel/bin/compilervars.sh intel64
-```
-Add the following to the `Make.user` file:
-
-    USE_INTEL_MKL = 1
-
-It is highly recommended to start with a fresh clone of the Julia repository.
-
 ## Source distributions of releases
 
 Each pre-release and release of Julia has a "full" source distribution and a "light" source
diff --git a/doc/make.jl b/doc/make.jl
index c58b3eebed7165..03798b50888dd2 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -155,6 +155,7 @@ DevDocs = [
         "devdocs/debuggingtips.md",
         "devdocs/valgrind.md",
         "devdocs/sanitizers.md",
+        "devdocs/probes.md"
     ]
 ]
 
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index 49ee30e0af90e2..76277e39bc079a 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -145,6 +145,10 @@ Run time-intensive code optimizations
 -O <n>, --optimize=<n>
 Set the optimization level to <n>
 
+.TP
+--min-optlevel=<n>
+Set the minimum optimization level to <n>, overriding per-module settings
+
 .TP
 -g
 Enable generation of full debug info
@@ -158,8 +162,8 @@ Set the level of debug info generation to <n>
 Control whether inlining is permitted (overrides functions declared as @inline)
 
 .TP
---check-bounds={yes|no}
-Emit bounds checks always or never (ignoring declarations)
+--check-bounds={yes|no|auto}
+Emit bounds checks always, never, or respect @inbounds declarations
 
 .TP
 --math-mode={ieee|user}
diff --git a/doc/src/assets/julia-manual.css b/doc/src/assets/julia-manual.css
index f303c677497978..309398dd3ccb53 100644
--- a/doc/src/assets/julia-manual.css
+++ b/doc/src/assets/julia-manual.css
@@ -1,3 +1,7 @@
+pre, code {
+    font-variant-ligatures: no-contextual;
+}
+
 nav.toc h1 {
     display: none;
 }
diff --git a/doc/src/base/arrays.md b/doc/src/base/arrays.md
index ec8678361da605..1dc2d8ed926afd 100644
--- a/doc/src/base/arrays.md
+++ b/doc/src/base/arrays.md
@@ -40,6 +40,7 @@ Base.trues
 Base.falses
 Base.fill
 Base.fill!
+Base.empty
 Base.similar
 ```
 
@@ -51,6 +52,7 @@ Base.size
 Base.axes(::Any)
 Base.axes(::AbstractArray, ::Any)
 Base.length(::AbstractArray)
+Base.keys(::AbstractArray)
 Base.eachindex
 Base.IndexStyle
 Base.IndexLinear
@@ -91,6 +93,7 @@ Base.Broadcast.result_style
 Base.getindex(::AbstractArray, ::Any...)
 Base.setindex!(::AbstractArray, ::Any, ::Any...)
 Base.copyto!(::AbstractArray, ::CartesianIndices, ::AbstractArray, ::CartesianIndices)
+Base.copy!
 Base.isassigned
 Base.Colon
 Base.CartesianIndex
@@ -138,6 +141,7 @@ Base.cat
 Base.vcat
 Base.hcat
 Base.hvcat
+Base.hvncat
 Base.vect
 Base.circshift
 Base.circshift!
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index fe3e7e4ff498a4..1e419460bef533 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -15,6 +15,11 @@ Some general notes:
   * By convention, function names ending with an exclamation point (`!`) modify their arguments.
     Some functions have both modifying (e.g., `sort!`) and non-modifying (`sort`) versions.
 
+The behaviors of `Base` and standard libraries are stable as defined in
+[SemVer](https://semver.org/) only if they are documented; i.e., included in the
+[Julia documentation](https://docs.julialang.org/) and not marked as unstable.
+See [API FAQ](@ref man-api) for more information.
+
 ## Getting Around
 
 ```@docs
@@ -158,6 +163,7 @@ Base.typejoin
 Base.typeintersect
 Base.promote_type
 Base.promote_rule
+Base.promote_typejoin
 Base.isdispatchtuple
 ```
 
@@ -213,14 +219,17 @@ Core.Union
 Union{}
 Core.UnionAll
 Core.Tuple
+Core.NTuple
 Core.NamedTuple
 Base.@NamedTuple
 Base.Val
 Core.Vararg
 Core.Nothing
 Base.isnothing
+Base.notnothing
 Base.Some
 Base.something
+Base.@something
 Base.Enums.Enum
 Base.Enums.@enum
 Core.Expr
@@ -235,6 +244,7 @@ Core.Module
 Core.Function
 Base.hasmethod
 Core.applicable
+Base.isambiguous
 Core.invoke
 Base.@invoke
 Base.invokelatest
@@ -243,6 +253,9 @@ new
 Base.:(|>)
 Base.:(∘)
 Base.ComposedFunction
+Base.splat
+Base.Fix1
+Base.Fix2
 ```
 
 ## Syntax
@@ -277,6 +290,7 @@ Base.@deprecate
 Base.Missing
 Base.missing
 Base.coalesce
+Base.@coalesce
 Base.ismissing
 Base.skipmissing
 Base.nonmissingtype
@@ -297,6 +311,7 @@ Base.ignorestatus
 Base.detach
 Base.Cmd
 Base.setenv
+Base.addenv
 Base.withenv
 Base.pipeline(::Any, ::Any, ::Any, ::Any...)
 Base.pipeline(::Base.AbstractCmd)
@@ -341,7 +356,7 @@ Core.throw
 Base.rethrow
 Base.backtrace
 Base.catch_backtrace
-Base.catch_stack
+Base.current_exceptions
 Base.@assert
 Base.Experimental.register_error_hint
 Base.Experimental.show_error_hints
@@ -406,6 +421,7 @@ Base.isconst
 Base.nameof(::Function)
 Base.functionloc(::Any, ::Any)
 Base.functionloc(::Method)
+Base.@locals
 ```
 
 ## Internals
diff --git a/doc/src/base/collections.md b/doc/src/base/collections.md
index 347d2d33724d4c..84e5702e0e396f 100644
--- a/doc/src/base/collections.md
+++ b/doc/src/base/collections.md
@@ -66,6 +66,7 @@ Base.LinRange
 Base.isempty
 Base.empty!
 Base.length
+Base.checked_length
 ```
 
 Fully implemented by:
@@ -200,10 +201,8 @@ Base.IdDict
 Base.WeakKeyDict
 Base.ImmutableDict
 Base.haskey
-Base.get(::Any, ::Any, ::Any)
 Base.get
-Base.get!(::Any, ::Any, ::Any)
-Base.get!(::Function, ::Any, ::Any)
+Base.get!
 Base.getkey
 Base.delete!
 Base.pop!(::Any, ::Any, ::Any)
@@ -275,6 +274,7 @@ Base.pushfirst!
 Base.popfirst!
 Base.insert!
 Base.deleteat!
+Base.keepat!
 Base.splice!
 Base.resize!
 Base.append!
diff --git a/doc/src/base/file.md b/doc/src/base/file.md
index 93b5be617ad4bd..40d1cc2ca7ef0a 100644
--- a/doc/src/base/file.md
+++ b/doc/src/base/file.md
@@ -8,6 +8,7 @@ Base.Filesystem.readdir
 Base.Filesystem.walkdir
 Base.Filesystem.mkdir
 Base.Filesystem.mkpath
+Base.Filesystem.hardlink
 Base.Filesystem.symlink
 Base.Filesystem.readlink
 Base.Filesystem.chmod
diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md
index b798a708f22b21..ba7d779f9152ae 100644
--- a/doc/src/base/io-network.md
+++ b/doc/src/base/io-network.md
@@ -13,6 +13,7 @@ Base.take!(::Base.GenericIOBuffer)
 Base.fdio
 Base.flush
 Base.close
+Base.closewrite
 Base.write
 Base.read
 Base.read!
@@ -36,6 +37,7 @@ Base.iswritable
 Base.isreadable
 Base.isopen
 Base.fd
+Base.redirect_stdio
 Base.redirect_stdout
 Base.redirect_stdout(::Function, ::Any)
 Base.redirect_stderr
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index e0f094572a3987..177324abccfaac 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -49,6 +49,8 @@ Base.:(~)
 Base.:(&)
 Base.:(|)
 Base.xor
+Base.nand
+Base.nor
 Base.:(!)
 &&
 ||
@@ -65,6 +67,7 @@ Base.tan(::Number)
 Base.Math.sind
 Base.Math.cosd
 Base.Math.tand
+Base.Math.sincosd
 Base.Math.sinpi
 Base.Math.cospi
 Base.Math.sincospi
@@ -175,6 +178,7 @@ Base.nextprod
 Base.invmod
 Base.powermod
 Base.ndigits
+Base.add_sum
 Base.widemul
 Base.Math.evalpoly
 Base.Math.@evalpoly
diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md
index 4f3e4e53634a96..cb8ad06488f1fe 100644
--- a/doc/src/base/multi-threading.md
+++ b/doc/src/base/multi-threading.md
@@ -19,9 +19,27 @@ See also [Synchronization](@ref lib-task-sync).
 
 ## Atomic operations
 
+```@docs
+Base.@atomic
+Base.@atomicswap
+Base.@atomicreplace
+```
+
+!!! note
+
+    The following APIs are fairly primitive, and will likely be exposed through an `unsafe_*`-like wrapper.
+
+```
+Core.Intrinsics.atomic_pointerref(pointer::Ptr{T}, order::Symbol) --> T
+Core.Intrinsics.atomic_pointerset(pointer::Ptr{T}, new::T, order::Symbol) --> pointer
+Core.Intrinsics.atomic_pointerswap(pointer::Ptr{T}, new::T, order::Symbol) --> old
+Core.Intrinsics.atomic_pointermodify(pointer::Ptr{T}, function::(old::T,arg::S)->T, arg::S, order::Symbol) --> old
+Core.Intrinsics.atomic_pointerreplace(pointer::Ptr{T}, expected::Any, new::T, success_order::Symbol, failure_order::Symbol) --> (old, cmp)
+```
+
 !!! warning
 
-    The API for atomic operations has not yet been finalized and is likely to change.
+    The following APIs are deprecated, though support for them is likely to remain for several releases.
 
 ```@docs
 Base.Threads.Atomic
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index ce8e25107ab51c..a508603a2d220e 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -27,6 +27,7 @@ Base.schedule
 ## [Synchronization](@id lib-task-sync)
 
 ```@docs
+Base.errormonitor
 Base.@sync
 Base.wait
 Base.fetch(t::Task)
diff --git a/doc/src/base/punctuation.md b/doc/src/base/punctuation.md
index 3a92d021f2f105..69b72e467e9995 100644
--- a/doc/src/base/punctuation.md
+++ b/doc/src/base/punctuation.md
@@ -2,17 +2,17 @@
 
 Extended documentation for mathematical symbols & functions is [here](@ref math-ops).
 
-| symbol      | meaning                                                                                                                                         |
-|:----------- |:----------------------------------------------------------------------------------------------------------------------------------------------- |
-| `@m`        | the at-symbol invokes [macro](@ref man-macros) `m`; followed by space-separated expressions or a function-call-like argument list |
+| symbol      | meaning                                                                                     |
+|:----------- |:--------------------------------------------------------------------------------------------|
+| `@`         | the at-sign marks a [macro](@ref man-macros) invocation; optionally followed by an argument list |
 | [`!`](@ref) | an exclamation mark is a prefix operator for logical negation ("not")                       |
 | `a!`        | function names that end with an exclamation mark modify one or more of their arguments by convention |
 | `#`         | the number sign (or hash or pound) character begins single line comments                    |
-| `#=`        | when followed by an equals sign, it begins a multi-line comment (these are nestable)          |
+| `#=`        | when followed by an equals sign, it begins a multi-line comment (these are nestable)        |
 | `=#`        | end a multi-line comment by immediately preceding the number sign with an equals sign       |
 | `$`         | the dollar sign is used for [string](@ref string-interpolation) and [expression](@ref man-expression-interpolation) interpolation |
 | [`%`](@ref rem) | the percent symbol is the remainder operator                                            |
-| [`^`](@ref) | the caret is the exponentiation operator                                                  |
+| [`^`](@ref) | the caret is the exponentiation operator                                                    |
 | [`&`](@ref) | single ampersand is bitwise and                                                             |
 | [`&&`](@ref)| double ampersands is short-circuiting boolean and                                           |
 | [`\|`](@ref)| single pipe character is bitwise or                                                         |
@@ -31,7 +31,7 @@ Extended documentation for mathematical symbols & functions is [here](@ref math-
 | `[,]`       | [vector literal constructor](@ref man-array-literals) (calling [`vect`](@ref Base.vect))    |
 | `[;]`       | [vertical concatenation](@ref man-array-concatenation) (calling [`vcat`](@ref) or [`hvcat`](@ref)) |
 | `[    ]`    | with space-separated expressions, [horizontal concatenation](@ref man-concatenation) (calling [`hcat`](@ref) or [`hvcat`](@ref)) |
-| `T{ }`      | curly braces following a type list that type's [parameters](@ref Parametric-Types)      |
+| `T{ }`      | curly braces following a type list that type's [parameters](@ref Parametric-Types)          |
 | `{}`        | curly braces can also be used to group multiple [`where`](@ref) expressions in function declarations |
 | `;`         | semicolons separate statements, begin a list of keyword arguments in function declarations or calls, or are used to separate array literals for vertical concatenation |
 | `,`         | commas separate function arguments or tuple or array components                             |
@@ -55,6 +55,6 @@ Extended documentation for mathematical symbols & functions is [here](@ref math-
 | [`==`](@ref)| double equals sign is value equality comparison                                             |
 | [`===`](@ref) | triple equals sign is programmatically identical equality comparison                      |
 | [`=>`](@ref Pair) | right arrow using an equals sign defines a [`Pair`](@ref) typically used to populate [dictionaries](@ref Dictionaries) |
-| `->` | right arrow using a hyphen defines an [anonymous function](@ref man-anonymous-functions) on a single line         |
-| `|>` | pipe operator passes output from the left argument to input of the right argument, usually a [function](@ref Function-composition-and-piping) |
-| `∘` | function composition operator (typed with \circ{tab}) combines two functions as though they are a single larger [function](@ref Function-composition-and-piping) |
+| `->`        | right arrow using a hyphen defines an [anonymous function](@ref man-anonymous-functions) on a single line |
+| `\|>`        | pipe operator passes output from the left argument to input of the right argument, usually a [function](@ref Function-composition-and-piping) |
+| `∘`         | function composition operator (typed with \circ{tab}) combines two functions as though they are a single larger [function](@ref Function-composition-and-piping) |
diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
index 185a915705f2c8..a7e9a8ee4eeeec 100644
--- a/doc/src/base/strings.md
+++ b/doc/src/base/strings.md
@@ -33,6 +33,8 @@ Base.isvalid(::Any, ::Any)
 Base.isvalid(::AbstractString, ::Integer)
 Base.match
 Base.eachmatch
+Base.RegexMatch
+Base.keys(::RegexMatch)
 Base.isless(::AbstractString, ::AbstractString)
 Base.:(==)(::AbstractString, ::AbstractString)
 Base.cmp(::AbstractString, ::AbstractString)
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
index af89290618fec9..187c93d7f61e92 100644
--- a/doc/src/devdocs/ast.md
+++ b/doc/src/devdocs/ast.md
@@ -63,23 +63,25 @@ call. Finally, chains of comparisons have their own special expression structure
 
 ### Bracketed forms
 
-| Input                    | AST                                  |
-|:------------------------ |:------------------------------------ |
-| `a[i]`                   | `(ref a i)`                          |
-| `t[i;j]`                 | `(typed_vcat t i j)`                 |
-| `t[i j]`                 | `(typed_hcat t i j)`                 |
-| `t[a b; c d]`            | `(typed_vcat t (row a b) (row c d))` |
-| `a{b}`                   | `(curly a b)`                        |
-| `a{b;c}`                 | `(curly a (parameters c) b)`         |
-| `[x]`                    | `(vect x)`                           |
-| `[x,y]`                  | `(vect x y)`                         |
-| `[x;y]`                  | `(vcat x y)`                         |
-| `[x y]`                  | `(hcat x y)`                         |
-| `[x y; z t]`             | `(vcat (row x y) (row z t))`         |
-| `[x for y in z, a in b]` | `(comprehension x (= y z) (= a b))`  |
-| `T[x for y in z]`        | `(typed_comprehension T x (= y z))`  |
-| `(a, b, c)`              | `(tuple a b c)`                      |
-| `(a; b; c)`              | `(block a (block b c))`              |
+| Input                    | AST                                               |
+|:------------------------ |:------------------------------------------------- |
+| `a[i]`                   | `(ref a i)`                                       |
+| `t[i;j]`                 | `(typed_vcat t i j)`                              |
+| `t[i j]`                 | `(typed_hcat t i j)`                              |
+| `t[a b; c d]`            | `(typed_vcat t (row a b) (row c d))`              |
+| `t[a b;;; c d]`          | `(typed_ncat t 3 (row a b) (row c d))`            |
+| `a{b}`                   | `(curly a b)`                                     |
+| `a{b;c}`                 | `(curly a (parameters c) b)`                      |
+| `[x]`                    | `(vect x)`                                        |
+| `[x,y]`                  | `(vect x y)`                                      |
+| `[x;y]`                  | `(vcat x y)`                                      |
+| `[x y]`                  | `(hcat x y)`                                      |
+| `[x y; z t]`             | `(vcat (row x y) (row z t))`                      |
+| `[x;y;; z;t;;;]`         | `(ncat 3 (nrow 2 (nrow 1 x y) (nrow 1 z t)))`     |
+| `[x for y in z, a in b]` | `(comprehension x (= y z) (= a b))`               |
+| `T[x for y in z]`        | `(typed_comprehension T x (= y z))`               |
+| `(a, b, c)`              | `(tuple a b c)`                                   |
+| `(a; b; c)`              | `(block a (block b c))`                           |
 
 ### Macros
 
@@ -436,6 +438,10 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
     Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`.
 
+  * `undefcheck`
+
+    Temporary node inserted by the compiler and will be processed in `type_lift_pass!`.
+
   * `enter`
 
     Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on
@@ -505,11 +511,11 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
         The calling convention for the call.
 
-      * `args[6:length(args[3])]` : arguments
+      * `args[6:5+length(args[3])]` : arguments
 
         The values for all the arguments (with types of each given in args[3]).
 
-      * `args[(length(args[3]) + 1):end]` : gc-roots
+      * `args[6+length(args[3])+1:end]` : gc-roots
 
         The additional objects that may need to be gc-rooted for the duration of the call.
         See [Working with LLVM](@ref Working-with-LLVM) for where these are derived from and how they get handled.
diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md
index 300d7f8b72993f..4f9c247db8c805 100644
--- a/doc/src/devdocs/boundscheck.md
+++ b/doc/src/devdocs/boundscheck.md
@@ -92,4 +92,4 @@ the last argument).
 
 ## Emit bounds checks
 
-Julia can be launched with `--check-bounds={yes|no}` to emit bounds checks always or never (ignoring declarations).
+Julia can be launched with `--check-bounds={yes|no|auto}` to emit bounds checks always, never, or respect @inbounds declarations.
diff --git a/doc/src/devdocs/eval.md b/doc/src/devdocs/eval.md
index c3a9cd0fc1865b..1aea5161ad23a6 100644
--- a/doc/src/devdocs/eval.md
+++ b/doc/src/devdocs/eval.md
@@ -43,7 +43,7 @@ The 10,000 foot view of the whole process is as follows:
    interpreter.
 9. `jl_toplevel_eval_flex()` then [expands](@ref dev-macro-expansion) the code to eliminate any macros and to "lower"
    the AST to make it simpler to execute.
-10. `jl_toplevel_eval_flex()` then uses some simple heuristics to decide whether to JIT compiler the
+10. `jl_toplevel_eval_flex()` then uses some simple heuristics to decide whether to JIT compile the
     AST or to interpret it directly.
 11. The bulk of the work to interpret code is handled by [`eval` in `interpreter.c`](https://github.com/JuliaLang/julia/blob/master/src/interpreter.c).
 12. If instead, the code is compiled, the bulk of the work is handled by `codegen.cpp`. Whenever a
diff --git a/doc/src/devdocs/inference.md b/doc/src/devdocs/inference.md
index c8fea74995595b..a9c4ec5c726ed4 100644
--- a/doc/src/devdocs/inference.md
+++ b/doc/src/devdocs/inference.md
@@ -6,8 +6,8 @@
 to the process of deducing the types of later values from the types of
 input values. Julia's approach to inference has been described in blog
 posts
-([1](https://juliacomputing.com/blog/2016/04/04/inference-convergence.html),
-[2](https://juliacomputing.com/blog/2017/05/15/inference-converage2.html)).
+([1](https://juliacomputing.com/blog/2016/04/inference-convergence/),
+[2](https://juliacomputing.com/blog/2017/05/inference-converage2/)).
 
 ## Debugging compiler.jl
 
@@ -37,9 +37,8 @@ m = first(mths)
 interp = Core.Compiler.NativeInterpreter()
 sparams = Core.svec()      # this particular method doesn't have type-parameters
 optimize = true            # run all inference optimizations
-cached = false             # force inference to happen (do not use cached results)
 types = Tuple{typeof(convert), atypes.parameters...} # Tuple{typeof(convert), Type{Int}, UInt}
-Core.Compiler.typeinf_code(interp, types, sparams, optimize, cached)
+Core.Compiler.typeinf_code(interp, m, types, sparams, optimize)
 ```
 
 If your debugging adventures require a `MethodInstance`, you can look it up by
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index 454cb0333443c7..fd14b0020ccbea 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -42,7 +42,7 @@ The default version of LLVM is specified in `deps/Versions.make`. You can overri
 a file called `Make.user` in the top-level directory and adding a line to it such as:
 
 ```
-LLVM_VER = 6.0.1
+LLVM_VER = 12.0.1
 ```
 
 Besides the LLVM release numerals, you can also use `LLVM_VER = svn` to build against the latest
@@ -85,12 +85,6 @@ cc -shared -o sys.so sys.o
 ```
 This system image can then be loaded by `julia` as usual.
 
-Alternatively, you can
-use `--output-jit-bc jit.bc` to obtain a trace of all IR passed to the JIT.
-This is useful for code that cannot be run as part of the sysimg generation
-process (e.g. because it creates unserializable state). However, the resulting
-`jit.bc` does not include sysimage data, and can thus not be used as such.
-
 It is also possible to dump an LLVM IR module for just one Julia function,
 using:
 ```julia
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
new file mode 100644
index 00000000000000..7e2fb96df5ac8d
--- /dev/null
+++ b/doc/src/devdocs/probes.md
@@ -0,0 +1,168 @@
+# Instrumenting Julia with DTrace, and bpftrace
+
+DTrace and bpftrace are tools that enable lightweight instrumentation of processes.
+You can turn the instrumentation on and off while the process is running,
+and with instrumentation off the overhead is minimal.
+
+!!! compat "Julia 1.8"
+    Support for probes was added in Julia 1.8
+
+!!! note
+    This documentation has been written from a Linux perspective, most of this
+    should hold on Mac OS/Darwin and FreeBSD.
+
+## Enabling support
+
+On Linux install the `systemtap` package that has a version of `dtrace`.
+
+```
+WITH_DTRACE=1
+```
+
+### Verifying
+
+```
+> readelf -n usr/lib/libjulia-internal.so.1
+
+Displaying notes found in: .note.gnu.build-id
+  Owner                Data size 	Description
+  GNU                  0x00000014	NT_GNU_BUILD_ID (unique build ID bitstring)
+    Build ID: 57161002f35548772a87418d2385c284ceb3ead8
+
+Displaying notes found in: .note.stapsdt
+  Owner                Data size 	Description
+  stapsdt              0x00000029	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__begin
+    Location: 0x000000000013213e, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cac
+    Arguments:
+  stapsdt              0x00000032	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__stop_the_world
+    Location: 0x0000000000132144, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cae
+    Arguments:
+  stapsdt              0x00000027	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__end
+    Location: 0x000000000013214a, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb0
+    Arguments:
+  stapsdt              0x0000002d	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__finalizer
+    Location: 0x0000000000132150, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb2
+    Arguments:
+```
+
+## Adding probes in libjulia
+
+Probes are declared in dtraces format in the file `src/uprobes.d`. The generated
+header file is included in `src/julia_internal.h` and if you add probes you should
+provide a noop implementation there.
+
+The header will contain a semaphore `*_ENABLED` and the actual call to the probe.
+If the probe arguments are expensive to compute you should first check if the
+probe is enabled and then compute the arguments and call the probe.
+
+```c
+  if (JL_PROBE_{PROBE}_ENABLED())
+    auto expensive_arg = ...;
+    JL_PROBE_{PROBE}(expensive_arg);
+```
+
+If your probe has no arguments it is preferred to not include the semaphore check.
+With USDT probes enabled the cost of a semaphore is a memory load, irrespective of
+the fact that the probe is enabled or not.
+
+```c
+#define JL_PROBE_GC_BEGIN_ENABLED() __builtin_expect (julia_gc__begin_semaphore, 0)
+__extension__ extern unsigned short julia_gc__begin_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
+```
+
+Whereas the probe itself is a noop sled that will be patched to a trampoline to
+the probe handler.
+
+## Available probes
+
+### GC probes
+
+1. `julia:gc__begin`: GC begins running on one thread and triggers stop-the-world.
+2. `julia:gc__stop_the_world`: All threads have reached a safepoint and GC runs.
+3. `julia:gc__mark__begin`: Beginning the mark phase
+4. `julia:gc__mark_end(scanned_bytes, perm_scanned)`: Mark phase ended
+5. `julia:gc__sweep_begin(full)`: Starting sweep
+6. `julia:gc__sweep_end()`: Sweep phase finished
+7. `julia:gc__end`: GC is finished, other threads continue work
+8. `julia:gc__finalizer`: Initial GC thread has finished running finalizers
+
+#### GC stop-the-world latency
+
+An example `bpftrace` script is given in `contrib/gc_stop_the_world_latency.bt`
+and it creates a histogram of the latency for all threads to reach a safepoint.
+
+Running this Julia code, with `julia -t 2`
+
+```
+using Base.Threads
+
+fib(x) = x <= 1 ? 1 : fib(x-1) + fib(x-2)
+
+beaver = @spawn begin
+    while true
+        fib(30)
+        # This safepoint is necessary until #41616, since otherwise this
+        # loop will never yield to GC.
+        GC.safepoint()
+    end
+end
+
+allocator = @spawn begin
+    while true
+        zeros(1024)
+    end
+end
+
+wait(allocator)
+```
+
+and in a second terminal
+
+```
+> sudo contrib/bpftrace/gc_stop_the_world_latency.bt
+Attaching 4 probes...
+Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.
+^C
+
+
+@usecs[1743412]:
+[4, 8)               971 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
+[8, 16)              837 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@        |
+[16, 32)             129 |@@@@@@                                              |
+[32, 64)              10 |                                                    |
+[64, 128)              1 |                                                    |
+```
+
+We can see the latency distribution of the stop-the-world phase in the executed Julia process.
+
+## Notes on using `bpftrace`
+
+An example probe in the bpftrace format looks like:
+
+```
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+	@start[pid] = nsecs;
+}
+```
+
+The probe declaration takes the kind `usdt`, then either the
+path to the library or the PID, the provider name `julia`
+and the probe name `gc__begin`. Note that I am using a
+relative path to the `libjulia-internal.so`, but this might
+need to be an absolute path on a production system.
+
+## Useful references:
+
+- [Julia Evans blog on Linux tracing systems](https://jvns.ca/blog/2017/07/05/linux-tracing-systems)
+- [LWN article on USDT and BPF](https://lwn.net/Articles/753601/)
+- [GDB support for probes](https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html)
+- [Brendan Gregg -- Linux Performance](https://www.brendangregg.com/linuxperf.html)
diff --git a/doc/src/devdocs/reflection.md b/doc/src/devdocs/reflection.md
index fbf0fd58d86a44..1a99f040a44932 100644
--- a/doc/src/devdocs/reflection.md
+++ b/doc/src/devdocs/reflection.md
@@ -96,7 +96,7 @@ as assignments, branches, and calls:
 ```jldoctest
 julia> Meta.lower(@__MODULE__, :( [1+2, sin(0.5)] ))
 :($(Expr(:thunk, CodeInfo(
-    @ none within `top-level scope'
+    @ none within `top-level scope`
 1 ─ %1 = 1 + 2
 │   %2 = sin(0.5)
 │   %3 = Base.vect(%1, %2)
diff --git a/doc/src/devdocs/sanitizers.md b/doc/src/devdocs/sanitizers.md
index 4bffa61e2a5dcf..87a62a50ffd94d 100644
--- a/doc/src/devdocs/sanitizers.md
+++ b/doc/src/devdocs/sanitizers.md
@@ -11,6 +11,12 @@ An easy solution is to have an dedicated build folder for providing a matching t
 with `BUILD_LLVM_CLANG=1`. You can then refer to this toolchain from another build
 folder by specifying `USECLANG=1` while overriding the `CC` and `CXX` variables.
 
+The sanitizers error out when they detect a shared library being opened using `RTLD_DEEPBIND`
+(ref: [google/sanitizers#611](https://github.com/google/sanitizers/issues/611)).
+Since [libblastrampoline](https://github.com/staticfloat/libblastrampoline) by default
+uses `RTLD_DEEPBIND`, we need to set the environment variable `LBT_USE_RTLD_DEEPBIND=0`
+when using a sanitizer.
+
 To use one of of the sanitizers set `SANITIZE=1` and then the appropriate flag for the sanitizer you
 want to use.
 
@@ -43,6 +49,71 @@ can be reduced by specifying `fast_unwind_on_malloc=0` and `malloc_context_size=
 of backtrace accuracy. For now, Julia also sets `detect_leaks=0`, but this should be removed in
 the future.
 
+### Example setup
+
+#### Step 1: Install toolchain
+
+Checkout a Git worktree (or create out-of-tree build directory) at
+`$TOOLCHAIN_WORKTREE` and create a config file `$TOOLCHAIN_WORKTREE/Make.user`
+with
+
+```
+USE_BINARYBUILDER_LLVM=1
+BUILD_LLVM_CLANG=1
+```
+
+Run:
+
+```sh
+cd $TOOLCHAIN_WORKTREE
+make -C deps install-llvm install-clang install-llvm-tools
+```
+
+to install toolchain binaries in `$TOOLCHAIN_WORKTREE/usr/tools`
+
+#### Step 2: Build Julia with ASAN
+
+Checkout a Git worktree (or create out-of-tree build directory) at
+`$BUILD_WORKTREE` and create a config file `$BUILD_WORKTREE/Make.user` with
+
+```
+TOOLCHAIN=$(TOOLCHAIN_WORKTREE)/usr/tools
+
+# use our new toolchain
+USECLANG=1
+override CC=$(TOOLCHAIN)/clang
+override CXX=$(TOOLCHAIN)/clang++
+export ASAN_SYMBOLIZER_PATH=$(TOOLCHAIN)/llvm-symbolizer
+
+USE_BINARYBUILDER_LLVM=1
+
+override SANITIZE=1
+override SANITIZE_ADDRESS=1
+
+# make the GC use regular malloc/frees, which are hooked by ASAN
+override WITH_GC_DEBUG_ENV=1
+
+# default to a debug build for better line number reporting
+override JULIA_BUILD_MODE=debug
+
+# make ASAN consume less memory
+export ASAN_OPTIONS=detect_leaks=0:fast_unwind_on_malloc=0:allow_user_segv_handler=1:malloc_context_size=2
+
+JULIA_PRECOMPILE=1
+
+# tell libblastrampoline to not use RTLD_DEEPBIND
+export LBT_USE_RTLD_DEEPBIND=0
+```
+
+Run:
+
+```sh
+cd $BUILD_WORKTREE
+make debug
+```
+
+to build `julia-debug` with ASAN.
+
 ## Memory Sanitizer (MSAN)
 
 For detecting use of uninitialized memory, you can use Clang's [memory sanitizer (MSAN)](http://clang.llvm.org/docs/MemorySanitizer.html)
diff --git a/doc/src/devdocs/subarrays.md b/doc/src/devdocs/subarrays.md
index 8ebc773812131c..dee9547fb1efd4 100644
--- a/doc/src/devdocs/subarrays.md
+++ b/doc/src/devdocs/subarrays.md
@@ -19,14 +19,14 @@ julia> A = rand(2,3,4);
 
 julia> S1 = view(A, :, 1, 2:3)
 2×2 view(::Array{Float64, 3}, :, 1, 2:3) with eltype Float64:
- 0.200586  0.066423
- 0.298614  0.956753
+ 0.166507  0.97397
+ 0.754392  0.831383
 
 julia> S2 = view(A, 1, :, 2:3)
 3×2 view(::Array{Float64, 3}, 1, :, 2:3) with eltype Float64:
- 0.200586  0.066423
- 0.246837  0.646691
- 0.648882  0.276021
+ 0.166507  0.97397
+ 0.518957  0.0705793
+ 0.503714  0.825124
 ```
 ```@meta
 DocTestSetup = nothing
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index 2afc264556713e..ac4b4d2e61e87d 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -5,10 +5,11 @@ technical computing languages pay a lot of attention to their array implementati
 of other containers. Julia does not treat arrays in any special way. The array library is implemented
 almost completely in Julia itself, and derives its performance from the compiler, just like any
 other code written in Julia. As such, it's also possible to define custom array types by inheriting
-from [`AbstractArray`](@ref). See the [manual section on the AbstractArray interface](@ref man-interface-array) for more details
-on implementing a custom array type.
+from [`AbstractArray`](@ref). See the [manual section on the AbstractArray interface](@ref man-interface-array)
+for more details on implementing a custom array type.
 
-An array is a collection of objects stored in a multi-dimensional grid. In the most general case,
+An array is a collection of objects stored in a multi-dimensional grid. Zero-dimensional arrays
+are allowed, see [this FAQ entry](@ref faq-array-0dim). In the most general case,
 an array may contain objects of type [`Any`](@ref). For most computational purposes, arrays should contain
 objects of a more specific type, such as [`Float64`](@ref) or [`Int32`](@ref).
 
@@ -67,9 +68,9 @@ omitted it will default to [`Float64`](@ref).
 | [`rand(T, dims...)`](@ref)                     | an `Array` with random, iid [^1] and uniformly distributed values in the half-open interval ``[0, 1)``                                                                                                                                       |
 | [`randn(T, dims...)`](@ref)                    | an `Array` with random, iid and standard normally distributed values                                                                                                                                                                         |
 | [`Matrix{T}(I, m, n)`](@ref)                   | `m`-by-`n` identity matrix. Requires `using LinearAlgebra` for [`I`](@ref).                                                                                                                                                                                                                   |
-| [`range(start, stop=stop, length=n)`](@ref)    | range of `n` linearly spaced elements from `start` to `stop`                                                                                                                                                                                 |
+| [`range(start, stop, n)`](@ref)                | a range of `n` linearly spaced elements from `start` to `stop` |
 | [`fill!(A, x)`](@ref)                          | fill the array `A` with the value `x`                                                                                                                                                                                                        |
-| [`fill(x, dims...)`](@ref)                     | an `Array` filled with the value `x`                                                                                                                                                                                                         |
+| [`fill(x, dims...)`](@ref)                     | an `Array` filled with the value `x`. In particular, `fill(x)` constructs a zero-dimensional `Array` containing `x`. |
 
 [^1]: *iid*, independently and identically distributed.
 
@@ -95,7 +96,7 @@ Here, `(2, 3)` is a [`Tuple`](@ref) and the first argument — the element type
 ## [Array literals](@id man-array-literals)
 
 Arrays can also be directly constructed with square braces; the syntax `[A, B, C, ...]`
-creates a one dimensional array (i.e., a vector) containing the comma-separated arguments as
+creates a one-dimensional array (i.e., a vector) containing the comma-separated arguments as
 its elements. The element type ([`eltype`](@ref)) of the resulting array is automatically
 determined by the types of the arguments inside the braces. If all the arguments are the
 same type, then that is its `eltype`. If they all have a common
@@ -126,7 +127,7 @@ Any[]
 
 ### [Concatenation](@id man-array-concatenation)
 
-If the arguments inside the square brackets are separated by semicolons (`;`) or newlines
+If the arguments inside the square brackets are separated by single semicolons (`;`) or newlines
 instead of commas, then their contents are _vertically concatenated_ together instead of
 the arguments being used as elements themselves.
 
@@ -154,7 +155,7 @@ julia> [1:2
  6
 ```
 
-Similarly, if the arguments are separated by tabs or spaces, then their contents are
+Similarly, if the arguments are separated by tabs or spaces or double semicolons, then their contents are
 _horizontally concatenated_ together.
 
 ```jldoctest
@@ -171,9 +172,13 @@ julia> [[1,2]  [4,5]  [7,8]]
 julia> [1 2 3] # Numbers can also be horizontally concatenated
 1×3 Matrix{Int64}:
  1  2  3
+
+julia> [1;; 2;; 3;; 4]
+1×4 Matrix{Int64}:
+ 1  2  3  4
 ```
 
-Using semicolons (or newlines) and spaces (or tabs) can be combined to concatenate
+Single semicolons (or newlines) and spaces (or tabs) can be combined to concatenate
 both horizontally and vertically at the same time.
 
 ```jldoctest
@@ -189,17 +194,135 @@ julia> [zeros(Int, 2, 2) [1; 2]
  0  0  1
  0  0  2
  3  4  5
+
+julia> [[1 1]; 2 3; [4 4]]
+3×2 Matrix{Int64}:
+ 1  1
+ 2  3
+ 4  4
+```
+
+Spaces (and tabs) have a higher precedence than semicolons, performing any horizontal
+concatenations first and then concatenating the result. Using double semicolons for the
+horizontal concatenation, on the other hand, performs any vertical concatenations before
+horizontally concatenating the result.
+
+```jldoctest
+julia> [zeros(Int, 2, 2) ; [3 4] ;; [1; 2] ; 5]
+3×3 Matrix{Int64}:
+ 0  0  1
+ 0  0  2
+ 3  4  5
+
+julia> [1:2; 4;; 1; 3:4]
+3×2 Matrix{Int64}:
+ 1  1
+ 2  3
+ 4  4
+```
+
+Just as `;` and `;;` concatenate in the first and second dimension, using more semicolons
+extends this same general scheme. The number of semicolons in the separator specifies the
+particular dimension, so `;;;` concetenates in the third dimension, `;;;;` in the 4th, and
+so on. Fewer semicolons take precedence, so the lower dimensions are generally concatenated
+first.
+
+```jldoctest
+julia> [1; 2;; 3; 4;; 5; 6;;;
+        7; 8;; 9; 10;; 11; 12]
+2×3×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  3  5
+ 2  4  6
+
+[:, :, 2] =
+ 7   9  11
+ 8  10  12
+```
+
+Like before, spaces (and tabs) for horizontal concatenation have a higher precedence than
+any number of semicolons. Thus, higher-dimensional arrays can also be written by specifying
+their rows first, with their elements textually arranged in a manner similar to their layout:
+
+```jldoctest
+julia> [1 3 5
+        2 4 6;;;
+        7 9 11
+        8 10 12]
+2×3×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  3  5
+ 2  4  6
+
+[:, :, 2] =
+ 7   9  11
+ 8  10  12
+
+julia> [1 2;;; 3 4;;;; 5 6;;; 7 8]
+1×2×2×2 Array{Int64, 4}:
+[:, :, 1, 1] =
+ 1  2
+
+[:, :, 2, 1] =
+ 3  4
+
+[:, :, 1, 2] =
+ 5  6
+
+[:, :, 2, 2] =
+ 7  8
+
+julia> [[1 2;;; 3 4];;;; [5 6];;; [7 8]]
+1×2×2×2 Array{Int64, 4}:
+[:, :, 1, 1] =
+ 1  2
+
+[:, :, 2, 1] =
+ 3  4
+
+[:, :, 1, 2] =
+ 5  6
+
+[:, :, 2, 2] =
+ 7  8
+```
+
+Although they both mean concatenation in the second dimension, spaces (or tabs) and `;;`
+cannot appear in the same array expression unless the double semicolon is simply serving as
+a "line continuation" character. This allows a single horizontal concatenation to span
+multiple lines (without the line break being interpreted as a vertical concatenation).
+
+```jldoctest
+julia> [1 2 ;;
+       3 4]
+1×4 Matrix{Int64}:
+ 1  2  3  4
+```
+
+Terminating semicolons may also be used to add trailing length 1 dimensions.
+
+```jldoctest
+julia> [1;;]
+1×1 Matrix{Int64}:
+ 1
+
+julia> [2; 3;;;]
+2×1×1 Array{Int64, 3}:
+[:, :, 1] =
+ 2
+ 3
 ```
 
 More generally, concatenation can be accomplished through the [`cat`](@ref) function.
 These syntaxes are shorthands for function calls that themselves are convenience functions:
 
-| Syntax            | Function        | Description                                        |
-|:----------------- |:--------------- |:-------------------------------------------------- |
-|                   | [`cat`](@ref)   | concatenate input arrays along dimension(s) `k`    |
-| `[A; B; C; ...]`  | [`vcat`](@ref)  | shorthand for `cat(A...; dims=1)                   |
-| `[A B C ...]`     | [`hcat`](@ref)  | shorthand for `cat(A...; dims=2)                   |
-| `[A B; C D; ...]` | [`hvcat`](@ref) | simultaneous vertical and horizontal concatenation |
+| Syntax                 | Function         | Description                                                                                                |
+|:---------------------- |:---------------- |:---------------------------------------------------------------------------------------------------------- |
+|                        | [`cat`](@ref)    | concatenate input arrays along dimension(s) `k`                                                            |
+| `[A; B; C; ...]`       | [`vcat`](@ref)   | shorthand for `cat(A...; dims=1)                                                                           |
+| `[A B C ...]`          | [`hcat`](@ref)   | shorthand for `cat(A...; dims=2)                                                                           |
+| `[A B; C D; ...]`      | [`hvcat`](@ref)  | simultaneous vertical and horizontal concatenation                                                         |
+| `[A; C;; B; D;;; ...]` | [`hvncat`](@ref) | simultaneous n-dimensional concatenation, where number of semicolons indicate the dimension to concatenate |
 
 ### Typed array literals
 
@@ -573,6 +696,12 @@ julia> A[:, 3]
  13
  15
  17
+
+julia> A[:, 3:3]
+3×1 Matrix{Int64}:
+ 13
+ 15
+ 17
 ```
 
 ### Cartesian indices
@@ -746,7 +875,7 @@ full set of cartesian indices to do their lookup (see [`IndexStyle`](@ref) to
 introspect which is which). As such, when iterating over an entire array, it's
 much better to iterate over [`eachindex(A)`](@ref) instead of `1:length(A)`.
 Not only will the former be much faster in cases where `A` is `IndexCartesian`,
-but it will also support OffsetArrays, too.
+but it will also support [OffsetArrays](https://github.com/JuliaArrays/OffsetArrays.jl), too.
 
 #### Omitted and extra indices
 
diff --git a/doc/src/manual/asynchronous-programming.md b/doc/src/manual/asynchronous-programming.md
index 1791d4b0e40f70..4eee0fccf7da2a 100644
--- a/doc/src/manual/asynchronous-programming.md
+++ b/doc/src/manual/asynchronous-programming.md
@@ -186,7 +186,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
 
     # we can schedule `n` instances of `foo` to be active concurrently.
     for _ in 1:n
-        @async foo()
+        errormonitor(@async foo())
     end
     ```
   * Channels are created via the `Channel{T}(sz)` constructor. The channel will only hold objects
@@ -211,7 +211,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
     julia> close(c);
 
     julia> put!(c, 2) # `put!` on a closed channel throws an exception.
-    ERROR: InvalidStateException("Channel is closed.",:closed)
+    ERROR: InvalidStateException: Channel is closed.
     Stacktrace:
     [...]
     ```
@@ -230,7 +230,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
     1
 
     julia> take!(c) # No more data available on a closed channel.
-    ERROR: InvalidStateException("Channel is closed.",:closed)
+    ERROR: InvalidStateException: Channel is closed.
     Stacktrace:
     [...]
     ```
@@ -263,10 +263,10 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> @async make_jobs(n); # feed the jobs channel with "n" jobs
+julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for i in 1:4 # start 4 tasks to process requests in parallel
-           @async do_work()
+           errormonitor(@async do_work())
        end
 
 julia> @elapsed while n > 0 # print out results
@@ -289,6 +289,10 @@ julia> @elapsed while n > 0 # print out results
 0.029772311
 ```
 
+Instead of `errormonitor(t)`, a more robust solution may be use use `bind(results, t)`, as that will
+not only log any unexpected failures, but also force the associated resources to close and propagate
+the exception everywhere.
+
 ## More task operations
 
 Task operations are built on a low-level primitive called [`yieldto`](@ref).
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 2bd99ea8366868..ccad2cce29dfe3 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -157,7 +157,7 @@ This is why we don't use the `Cstring` type here: as the array is uninitialized,
 NUL bytes. Converting to a `Cstring` as part of the [`ccall`](@ref) checks for contained NUL bytes
 and could therefore throw a conversion error.
 
-Deferencing `pointer(hostname)` with `unsafe_string` is an unsafe operation as it requires access to
+Dereferencing `pointer(hostname)` with `unsafe_string` is an unsafe operation as it requires access to
 the memory allocated for `hostname` that may have been in the meanwhile garbage collected. The macro
 [`GC.@preserve`](@ref) prevents this from happening and therefore accessing an invalid memory location.
 
@@ -657,7 +657,7 @@ For translating a C argument list to Julia:
       * `Ref{Any}`
       * argument list must be a valid Julia object (or `C_NULL`)
       * cannot be used for an output parameter, unless the user is able to
-        manage to separate arrange for the object to be GC-preserved
+        separately arrange for the object to be GC-preserved
   * `T*`
 
       * `Ref{T}`, where `T` is the Julia type corresponding to `T`
@@ -721,7 +721,8 @@ For translating a C return type to Julia:
           * `Ptr{T}`, where `T` is the Julia type corresponding to `T`
   * `T (*)(...)` (e.g. a pointer to a function)
 
-      * `Ptr{Cvoid}` (you may need to use [`@cfunction`](@ref) explicitly to create this pointer)
+      * `Ptr{Cvoid}` to call this directly from Julia you will need to pass this as the first argument to [`ccall`](@ref).
+        See [Indirect Calls](@ref).
 
 ### Passing Pointers for Modifying Inputs
 
@@ -928,7 +929,7 @@ macro dlsym(func, lib)
         let zlocal = $z[]
             if zlocal == C_NULL
                 zlocal = dlsym($(esc(lib))::Ptr{Cvoid}, $(esc(func)))::Ptr{Cvoid}
-                $z[] = $zlocal
+                $z[] = zlocal
             end
             zlocal
         end
diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md
index 4f5a4edad44862..d6f359f83d5cb4 100644
--- a/doc/src/manual/code-loading.md
+++ b/doc/src/manual/code-loading.md
@@ -349,6 +349,23 @@ The subscripted `rootsᵢ`, `graphᵢ` and `pathsᵢ` variables correspond to th
 
 Since the primary environment is typically the environment of a project you're working on, while environments later in the stack contain additional tools, this is the right trade-off: it's better to break your development tools but keep the project working. When such incompatibilities occur, you'll typically want to upgrade your dev tools to versions that are compatible with the main project.
 
+### Package/Environment Preferences
+
+Preferences are dictionaries of metadata that influence package behavior within an environment.
+The preferences system supports reading preferences at compile-time, which means that at code-loading time, we must ensure that a particular `.ji` file was built with the same preferences as the current environment before loading it.
+The public API for modifying Preferences is contained within the [Preferences.jl](https://github.com/JuliaPackaging/Preferences.jl) package.
+Preferences are stored as TOML dictionaries within a `(Julia)LocalPreferences.toml` file next to the currently-active project.
+If a preference is "exported", it is instead stored within the `(Julia)Project.toml` instead.
+The intention is to allow shared projects to contain shared preferences, while allowing for users themselves to override those preferences with their own settings in the LocalPreferences.toml file, which should be .gitignored as the name implies.
+
+Preferences that are accessed during compilation are automatically marked as compile-time preferences, and any change recorded to these preferences will cause the Julia compiler to recompile any cached precompilation `.ji` files for that module.
+This is done by serializing the hash of all compile-time preferences during compilation, then checking that hash against the current environment when searching for the proper `.ji` file to load.
+
+Preferences can be set with depot-wide defaults; if package Foo is installed within your global environment and it has preferences set, these preferences will apply as long as your global environment is part of your `LOAD_PATH`.
+Preferences in environments higher up in the environment stack get overridden by the more proximal entries in the load path, ending with the currently active project.
+This allows depot-wide preference defaults to exist, with active projects able to merge or even completely overwrite these inherited preferences.
+See the docstring for `Preferences.set_preferences!()` for the full details of how to set preferences to allow or disallow merging.
+
 ## Conclusion
 
 Federated package management and precise software reproducibility are difficult but worthy goals in a package system. In combination, these goals lead to a more complex package loading mechanism than most dynamic languages have, but it also yields scalability and reproducibility that is more commonly associated with static languages. Typically, Julia users should be able to use the built-in package manager to manage their projects without needing a precise understanding of these interactions. A call to `Pkg.add("X")` will add to the appropriate project and manifest files, selected via `Pkg.activate("Y")`, so that a future call to `import X` will load `X` without further thought.
diff --git a/doc/src/manual/command-line-options.md b/doc/src/manual/command-line-options.md
index b6dd1b6b027224..a0fe1849cc7b12 100644
--- a/doc/src/manual/command-line-options.md
+++ b/doc/src/manual/command-line-options.md
@@ -28,9 +28,10 @@ The following is a complete list of command-line switches available when launchi
 |`--warn-overwrite={yes\|no}`           |Enable or disable method overwrite warnings|
 |`-C`, `--cpu-target <target>`          |Limit usage of CPU features up to `<target>`; set to `help` to see the available options|
 |`-O`, `--optimize={0,1,2,3}`           |Set the optimization level (default level is 2 if unspecified or 3 if used without a level)|
+|`--min-optlevel={0,1,2,3}`             |Set the lower bound on per-module optimization (default is 0)|
 |`-g`, `-g <level>`                     |Enable / Set the level of debug info generation (default level is 1 if unspecified or 2 if used without a level)|
 |`--inline={yes\|no}`                   |Control whether inlining is permitted, including overriding `@inline` declarations|
-|`--check-bounds={yes\|no}`             |Emit bounds checks always or never (ignoring declarations)|
+|`--check-bounds={yes\|no\|auto}`       |Emit bounds checks always, never, or respect @inbounds declarations|
 |`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides @fastmath declaration)|
 |`--code-coverage={none\|user\|all}`    |Count executions of source lines|
 |`--code-coverage`                      |equivalent to `--code-coverage=user`|
diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
index 99e4a677e27241..ac48e5b420f5e4 100644
--- a/doc/src/manual/complex-and-rational-numbers.md
+++ b/doc/src/manual/complex-and-rational-numbers.md
@@ -8,7 +8,7 @@ behave as expected.
 ## Complex Numbers
 
 The global constant [`im`](@ref) is bound to the complex number *i*, representing the principal
-square root of -1. (Using mathematicians' `i` or engineers' `j` for this global constant were rejected since they are such popular index variable names.) Since Julia allows numeric literals to be [juxtaposed with identifiers as coefficients](@ref man-numeric-literal-coefficients),
+square root of -1. (Using mathematicians' `i` or engineers' `j` for this global constant was rejected since they are such popular index variable names.) Since Julia allows numeric literals to be [juxtaposed with identifiers as coefficients](@ref man-numeric-literal-coefficients),
 this binding suffices to provide convenient syntax for complex numbers, similar to the traditional
 mathematical notation:
 
diff --git a/doc/src/manual/control-flow.md b/doc/src/manual/control-flow.md
index 1b785070c0e1a4..63832cc4c90c96 100644
--- a/doc/src/manual/control-flow.md
+++ b/doc/src/manual/control-flow.md
@@ -817,7 +817,7 @@ The power of the `try/catch` construct lies in the ability to unwind a deeply ne
 immediately to a much higher level in the stack of calling functions. There are situations where
 no error has occurred, but the ability to unwind the stack and pass a value to a higher level
 is desirable. Julia provides the [`rethrow`](@ref), [`backtrace`](@ref), [`catch_backtrace`](@ref)
-and [`Base.catch_stack`](@ref) functions for more advanced error handling.
+and [`current_exceptions`](@ref) functions for more advanced error handling.
 
 ### `finally` Clauses
 
diff --git a/doc/src/manual/conversion-and-promotion.md b/doc/src/manual/conversion-and-promotion.md
index c33a765d215d97..a8d8bf61dc54ba 100644
--- a/doc/src/manual/conversion-and-promotion.md
+++ b/doc/src/manual/conversion-and-promotion.md
@@ -319,9 +319,15 @@ julia> promote_type(Int8, Int64)
 Int64
 ```
 
+Note that we do **not** overload `promote_type` directly: we overload `promote_rule` instead.
+`promote_type` uses `promote_rule`, and adds the symmetry.
+Overloading it directly can cause ambiguity errrors.
+We overload `promote_rule` to define how things should be promoted, and we use `promote_type`
+to query that.
+
 Internally, `promote_type` is used inside of `promote` to determine what type argument values
-should be converted to for promotion. It can, however, be useful in its own right. The curious
-reader can read the code in [`promotion.jl`](https://github.com/JuliaLang/julia/blob/master/base/promotion.jl),
+should be converted to for promotion. The curious reader can read the code in
+[`promotion.jl`](https://github.com/JuliaLang/julia/blob/master/base/promotion.jl),
 which defines the complete promotion mechanism in about 35 lines.
 
 ### Case Study: Rational Promotions
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index e5b6e78cae9815..8f2b02fff8e1a2 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -1,6 +1,6 @@
 # Multi-processing and Distributed Computing
 
-An implementation of distributed memory parallel computing is provided by module `Distributed`
+An implementation of distributed memory parallel computing is provided by module [`Distributed`](@ref man-distributed)
 as part of the standard library shipped with Julia.
 
 Most modern computers possess more than one CPU, and several computers can be combined together
@@ -45,11 +45,11 @@ computation is running on the worker.
 
 Let's try this out. Starting with `julia -p n` provides `n` worker processes on the local machine.
 Generally it makes sense for `n` to equal the number of CPU threads (logical cores) on the machine. Note that the `-p`
-argument implicitly loads module `Distributed`.
+argument implicitly loads module [`Distributed`](@ref man-distributed).
 
 
 ```julia
-$ ./julia -p 2
+$ julia -p 2
 
 julia> r = remotecall(rand, 2, 2, 2)
 Future(2, 1, 4, nothing)
@@ -80,10 +80,18 @@ you read from a remote object to obtain data needed by the next local operation.
 but is more efficient.
 
 ```julia-repl
-julia> remotecall_fetch(getindex, 2, r, 1, 1)
+julia> remotecall_fetch(r-> fetch(r)[1, 1], 2, r)
 0.18526337335308085
 ```
 
+This fetches the array on worker 2 and returns the first value. Note, that `fetch` doesn't move any data in
+this case, since it's executed on the worker that owns the array. One can also write:
+
+```julia-repl
+julia> remotecall_fetch(getindex, 2, r, 1, 1)
+0.10824216411304866
+```
+
 Remember that [`getindex(r,1,1)`](@ref) is [equivalent](@ref man-array-indexing) to `r[1,1]`, so this call fetches
 the first element of the future `r`.
 
@@ -190,7 +198,7 @@ loaded
 ```
 
 As usual, this does not bring `DummyModule` into scope on any of the process, which requires
-`using` or `import`.  Moreover, when `DummyModule` is brought into scope on one process, it
+[`using`](@ref) or [`import`](@ref).  Moreover, when `DummyModule` is brought into scope on one process, it
 is not on any other:
 
 ```julia-repl
@@ -228,7 +236,7 @@ like a process providing an interactive prompt.
 
 Finally, if `DummyModule.jl` is not a standalone file but a package, then `using
 DummyModule` will _load_ `DummyModule.jl` on all processes, but only bring it into scope on
-the process where `using` was called.
+the process where [`using`](@ref) was called.
 
 ## Starting and managing worker processes
 
@@ -254,7 +262,7 @@ julia> addprocs(2)
  3
 ```
 
-Module `Distributed` must be explicitly loaded on the master process before invoking [`addprocs`](@ref).
+Module [`Distributed`](@ref man-distributed) must be explicitly loaded on the master process before invoking [`addprocs`](@ref).
 It is automatically made available on the worker processes.
 
 Note that workers do not run a `~/.julia/config/startup.jl` startup script, nor do they synchronize
@@ -314,8 +322,8 @@ is replaced with a more expensive operation. Then it might make sense to add ano
 statement just for this step.
 
 ## Global variables
-Expressions executed remotely via `@spawnat`, or closures specified for remote execution using
-`remotecall` may refer to global variables. Global bindings under module `Main` are treated
+Expressions executed remotely via [`@spawnat`](@ref), or closures specified for remote execution using
+[`remotecall`](@ref) may refer to global variables. Global bindings under module `Main` are treated
 a little differently compared to global bindings in other modules. Consider the following code
 snippet:
 
@@ -327,7 +335,7 @@ remotecall_fetch(()->sum(A), 2)
 In this case [`sum`](@ref) MUST be defined in the remote process.
 Note that `A` is a global variable defined in the local workspace. Worker 2 does not have a variable called
 `A` under `Main`. The act of shipping the closure `()->sum(A)` to worker 2 results in `Main.A` being defined
-on 2. `Main.A` continues to exist on worker 2 even after the call `remotecall_fetch` returns. Remote calls
+on 2. `Main.A` continues to exist on worker 2 even after the call [`remotecall_fetch`](@ref) returns. Remote calls
 with embedded global references (under `Main` module only) manage globals as follows:
 
 - New global bindings are created on destination workers if they are referenced as part of a remote call.
@@ -580,7 +588,7 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> @async make_jobs(n); # feed the jobs channel with "n" jobs
+julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for p in workers() # start tasks on the workers to process requests in parallel
            remote_do(do_work, p, jobs, results)
@@ -648,7 +656,7 @@ Once finalized, a reference becomes invalid and cannot be used in any further ca
 ## Local invocations
 
 Data is necessarily copied over to the remote node for execution. This is the case for both
-remotecalls and when data is stored to a[`RemoteChannel`](@ref) / [`Future`](@ref Distributed.Future) on
+remotecalls and when data is stored to a [`RemoteChannel`](@ref) / [`Future`](@ref Distributed.Future) on
 a different node. As expected, this results in a copy of the serialized objects
 on the remote node. However, when the destination node is the local node, i.e.
 the calling process id is the same as the remote node id, it is executed
@@ -697,11 +705,11 @@ Num Unique objects : 3
 ```
 
 As can be seen, [`put!`](@ref) on a locally owned [`RemoteChannel`](@ref) with the same
-object `v` modifed between calls results in the same single object instance stored. As
+object `v` modified between calls results in the same single object instance stored. As
 opposed to copies of `v` being created when the node owning `rc` is a different node.
 
 It is to be noted that this is generally not an issue. It is something to be factored in only
-if the object is both being stored locally and modifed post the call. In such cases it may be
+if the object is both being stored locally and modified post the call. In such cases it may be
 appropriate to store a `deepcopy` of the object.
 
 This is also true for remotecalls on the local node as seen in the following example:
@@ -1197,12 +1205,12 @@ requirements for the inbuilt `LocalManager` and `SSHManager`:
     Securing and encrypting all worker-worker traffic (via SSH) or encrypting individual messages
     can be done via a custom `ClusterManager`.
 
-  * If you specify `multiplex=true` as an option to `addprocs`, SSH multiplexing is used to create
+  * If you specify `multiplex=true` as an option to [`addprocs`](@ref), SSH multiplexing is used to create
     a tunnel between the master and workers. If you have configured SSH multiplexing on your own and
     the connection has already been established, SSH multiplexing is used regardless of `multiplex`
     option. If multiplexing is enabled, forwarding is set by using the existing connection
     (`-O forward` option in ssh). This is beneficial if your servers require password authentication;
-    you can avoid authentication in Julia by logging in to the server ahead of `addprocs`. The control
+    you can avoid authentication in Julia by logging in to the server ahead of [`addprocs`](@ref). The control
     socket will be located at `~/.ssh/julia-%r@%h:%p` during the session unless the existing multiplexing
     connection is used. Note that bandwidth may be limited if you create multiple processes on a node
     and enable multiplexing, because in that case processes share a single multiplexing TCP connection.
@@ -1228,7 +1236,7 @@ For example, cookies can be pre-shared and hence not specified as a startup argu
 
 ## Specifying Network Topology (Experimental)
 
-The keyword argument `topology` passed to `addprocs` is used to specify how the workers must be
+The keyword argument `topology` passed to [`addprocs`](@ref) is used to specify how the workers must be
 connected to each other:
 
   * `:all_to_all`, the default: all workers are connected to each other.
diff --git a/doc/src/manual/documentation.md b/doc/src/manual/documentation.md
index 8a3d41ffc8171a..77a3e78dfd9707 100644
--- a/doc/src/manual/documentation.md
+++ b/doc/src/manual/documentation.md
@@ -3,7 +3,7 @@
 Julia enables package developers and users to document functions, types and other objects easily
 via a built-in documentation system.
 
-The basic syntax is simple: any string appearing at the toplevel right before an object
+The basic syntax is simple: any string appearing just before an object
 (function, macro, type or instance) will be interpreted as documenting it (these are called
 *docstrings*). Note that no blank lines or comments may intervene between a docstring and
 the documented object. Here is a basic example:
@@ -91,10 +91,10 @@ As in the example above, we recommend following some simple conventions when wri
 5. Provide hints to related functions.
 
    Sometimes there are functions of related functionality. To increase discoverability please provide
-   a short list of these in a `See also:` paragraph.
+   a short list of these in a `See also` paragraph.
 
    ```
-   See also: [`bar!`](@ref), [`baz`](@ref), [`baaz`](@ref)
+   See also [`bar!`](@ref), [`baz`](@ref), [`baaz`](@ref).
    ```
 6. Include any code examples in an `# Examples` section.
 
@@ -128,8 +128,7 @@ As in the example above, we recommend following some simple conventions when wri
        Calling `rand` and other RNG-related functions should be avoided in doctests since they will not
        produce consistent outputs during different Julia sessions. If you would like to show some random
        number generation related functionality, one option is to explicitly construct and seed your own
-       [`MersenneTwister`](@ref) (or other pseudorandom number generator) and pass it to the functions you are
-       doctesting.
+       RNG object (see [`Random`](@ref Random-Numbers)) and pass it to the functions you are doctesting.
 
        Operating system word size ([`Int32`](@ref) or [`Int64`](@ref)) as well as path separator differences
        (`/` or `\`) will also affect the reproducibility of some doctests.
@@ -343,17 +342,17 @@ for your custom type that returns the documentation on a per-instance basis. For
 
 ```julia
 struct MyType
-    value::String
+    value::Int
 end
 
 Docs.getdoc(t::MyType) = "Documentation for MyType with value $(t.value)"
 
-x = MyType("x")
-y = MyType("y")
+x = MyType(1)
+y = MyType(2)
 ```
 
-`?x` will display "Documentation for MyType with value x" while `?y` will display
-"Documentation for MyType with value y".
+`?x` will display "Documentation for MyType with value 1" while `?y` will display
+"Documentation for MyType with value 2".
 
 ## Syntax Guide
 
diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index fcc9f06536ab36..22c2f66f9b8b03 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -16,7 +16,7 @@ We start with a simple C program that initializes Julia and calls some Julia cod
 
 ```c
 #include <julia.h>
-JULIA_DEFINE_FAST_TLS() // only define this once, in an executable (not in a shared library) if you want fast code.
+JULIA_DEFINE_FAST_TLS // only define this once, in an executable (not in a shared library) if you want fast code.
 
 int main(int argc, char *argv[])
 {
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index cce68bad13614e..efdd0c56895a11 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -272,7 +272,7 @@ should have at the terminal.
 
 ### `JULIA_DEBUG`
 
-Enable debug logging for a file or module, see [`Logging`](@ref Logging) for more information.
+Enable debug logging for a file or module, see [`Logging`](@ref man-logging) for more information.
 
 ### `JULIA_GC_ALLOC_POOL`, `JULIA_GC_ALLOC_OTHER`, `JULIA_GC_ALLOC_PRINT`
 
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index 1cd8ec849434b9..b7ccf92939a5ea 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -18,6 +18,33 @@ For similar reasons, automated translation to Julia would also typically generat
 
 On the other hand, language *interoperability* is extremely useful: we want to exploit existing high-quality code in other languages from Julia (and vice versa)!  The best way to enable this is not a transpiler, but rather via easy inter-language calling facilities.  We have worked hard on this, from the built-in `ccall` intrinsic (to call C and Fortran libraries) to [JuliaInterop](https://github.com/JuliaInterop) packages that connect Julia to Python, Matlab, C++, and more.
 
+## [Public API](@id man-api)
+
+### How does Julia define its public API?
+
+The only interfaces that are stable with respect to [SemVer](https://semver.org/) of `julia`
+version are the Julia `Base` and standard libraries interfaces described in
+[the documentation](https://docs.julialang.org/) and not marked as unstable (e.g.,
+experimental and internal).  Functions, types, and constants are not part of the public
+API if they are not included in the documentation, _even if they have docstrings_.
+
+### There is a useful undocumented function/type/constant. Can I use it?
+
+Updating Julia may break your code if you use non-public API.  If the code is
+self-contained, it may be a good idea to copy it into your project.  If you want to rely on
+a complex non-public API, especially when using it from a stable package, it is a good idea
+to open an [issue](https://github.com/JuliaLang/julia/issues) or
+[pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning it
+into a public API.  However, we do not discourage the attempt to create packages that expose
+stable public interfaces while relying on non-public implementation details of `julia` and
+buffering the differences across different `julia` versions.
+
+### The documentation is not accurate enough. Can I rely on the existing behavior?
+
+Please open an [issue](https://github.com/JuliaLang/julia/issues) or
+[pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning the
+existing behavior into a public API.
+
 ## Sessions and the REPL
 
 ### How do I delete an object in memory?
@@ -293,23 +320,23 @@ julia> threefloat()
 and similarly:
 
 ```jldoctest
-julia> function threetup()
-           x, y = [3, 3]
+julia> function twothreetup()
+           x, y = [2, 3] # assigns 2 to x and 3 to y
            x, y # returns a tuple
        end
-threetup (generic function with 1 method)
+twothreetup (generic function with 1 method)
 
-julia> function threearr()
-           x, y = [3, 3] # returns an array
+julia> function twothreearr()
+           x, y = [2, 3] # returns an array
        end
-threearr (generic function with 1 method)
+twothreearr (generic function with 1 method)
 
-julia> threetup()
-(3, 3)
+julia> twothreetup()
+(2, 3)
 
-julia> threearr()
+julia> twothreearr()
 2-element Vector{Int64}:
- 3
+ 2
  3
 ```
 
@@ -714,6 +741,32 @@ julia> remotecall_fetch(anon_bar, 2)
 1
 ```
 
+## Troubleshooting "method not matched": parametric type invariance and `MethodError`s
+
+### Why doesn't it work to declare `foo(bar::Vector{Real}) = 42` and then call `foo([1])`?
+
+As you'll see if you try this, the result is a `MethodError`:
+
+```jldoctest
+julia> foo(x::Vector{Real}) = 42
+foo (generic function with 1 method)
+
+julia> foo([1])
+ERROR: MethodError: no method matching foo(::Vector{Int64})
+Closest candidates are:
+  foo(!Matched::Vector{Real}) at none:1
+```
+
+This is because `Vector{Real}` is not a supertype of `Vector{Int}`! You can solve this problem with something
+like `foo(bar::Vector{T}) where {T<:Real}` (or the short form `foo(bar::Vector{<:Real})` if the static parameter `T`
+is not needed in the body of the function). The `T` is a wild card: you first specify that it must be a
+subtype of Real, then specify the function takes a Vector of with elements of that type.
+
+This same issue goes for any composite type `Comp`, not just `Vector`. If `Comp` has a parameter declared of
+type `Y`, then another type `Comp2` with a parameter of type `X<:Y` is not a subtype of `Comp`. This is
+type-invariance (by contrast, Tuple is type-covariant in its parameters). See [Parametric Composite
+Types](@ref man-parametric-composite-types) for more explanation of these.
+
 ### Why does Julia use `*` for string concatenation? Why not `+` or something else?
 
 The [main argument](@ref man-concatenation) against `+` is that string concatenation is not
@@ -869,7 +922,7 @@ julia> @sync for i in 1:3
 
 ## Arrays
 
-### What are the differences between zero-dimensional arrays and scalars?
+### [What are the differences between zero-dimensional arrays and scalars?](@id faq-array-0dim)
 
 Zero-dimensional arrays are arrays of the form `Array{T,0}`. They behave similar
 to scalars, but there are important differences. They deserve a special mention
diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md
index 5d1d649b750623..5fbca52bbfaad1 100644
--- a/doc/src/manual/functions.md
+++ b/doc/src/manual/functions.md
@@ -180,7 +180,7 @@ end
 ```
 
 This is a *convention* in the sense that `nothing` is not a Julia keyword
-but a only singleton object of type `Nothing`.
+but only a singleton object of type `Nothing`.
 Also, you may notice that the `printx` function example above is contrived,
 because `println` already returns `nothing`, so that the `return` line is redundant.
 
@@ -352,12 +352,26 @@ Named tuples are very similar to tuples, except that fields can additionally be
 using dot syntax (`x.a`) in addition to the regular indexing syntax
 (`x[1]`).
 
-## Multiple Return Values
+## [Destructuring Assignment and Multiple Return Values](@id destructuring-assignment)
 
-In Julia, one returns a tuple of values to simulate returning multiple values. However, tuples
-can be created and destructured without needing parentheses, thereby providing an illusion that
-multiple values are being returned, rather than a single tuple value. For example, the following
-function returns a pair of values:
+A comma-separated list of variables (optionally wrapped in parentheses) can appear on the
+left side of an assignment: the value on the right side is _destructured_ by iterating
+over and assigning to each variable in turn:
+
+```jldoctest
+julia> (a,b,c) = 1:3
+1:3
+
+julia> b
+2
+```
+
+The value on the right should be an iterator (see [Iteration interface](@ref man-interface-iteration))
+at least as long as the number of variables on the left (any excess elements of the
+iterator are ignored).
+
+This can be used to return multiple values from functions by returning a tuple or
+other iterable value. For example, the following function returns two values:
 
 ```jldoctest foofunc
 julia> function foo(a,b)
@@ -374,8 +388,7 @@ julia> foo(2,3)
 (5, 6)
 ```
 
-A typical usage of such a pair of return values, however, extracts each value into a variable.
-Julia supports simple tuple "destructuring" that facilitates this:
+Destructuring assignment extracts each value into a variable:
 
 ```jldoctest foofunc
 julia> x, y = foo(2,3)
@@ -388,15 +401,96 @@ julia> y
 6
 ```
 
-You can also return multiple values using the `return` keyword:
+Another common use is for swapping variables:
+```jldoctest foofunc
+julia> y, x = x, y
+(5, 6)
+
+julia> x
+6
 
-```julia
-function foo(a,b)
-    return a+b, a*b
-end
+julia> y
+5
+```
+
+If only a subset of the elements of the iterator are required, a common convention is to assign ignored elements to a variable
+consisting of only underscores `_` (which is an otherwise invalid variable name, see
+[Allowed Variable Names](@ref man-allowed-variable-names)):
+
+```jldoctest
+julia> _, _, _, d = 1:10
+1:10
+
+julia> d
+4
+```
+
+Other valid left-hand side expressions can be used as elements of the assignment list, which will call [`setindex!`](@ref) or [`setproperty!`](@ref), or recursively destructure individual elements of the iterator:
+
+```jldoctest
+julia> X = zeros(3);
+
+julia> X[1], (a,b) = (1, (2, 3))
+(1, (2, 3))
+
+julia> X
+3-element Vector{Float64}:
+ 1.0
+ 0.0
+ 0.0
+
+julia> a
+2
+
+julia> b
+3
+```
+
+!!! compat "Julia 1.6"
+    `...` with assignment requires Julia 1.6
+
+If the last symbol in the assignment list is suffixed by `...` (known as _slurping_), then
+it will be assigned a collection or lazy iterator of the remaining elements of the
+right-hand side iterator:
+
+```jldoctest
+julia> a, b... = "hello"
+"hello"
+
+julia> a
+'h': ASCII/Unicode U+0068 (category Ll: Letter, lowercase)
+
+julia> b
+"ello"
+
+julia> a, b... = Iterators.map(abs2, 1:4)
+Base.Generator{UnitRange{Int64}, typeof(abs2)}(abs2, 1:4)
+
+julia> a
+1
+
+julia> b
+Base.Iterators.Rest{Base.Generator{UnitRange{Int64}, typeof(abs2)}, Int64}(Base.Generator{UnitRange{Int64}, typeof(abs2)}(abs2, 1:4), 1)
 ```
 
-This has the exact same effect as the previous definition of `foo`.
+See [`Base.rest`](@ref) for details on the precise handling and customization for specific iterators.
+
+## Property destructuring
+
+Instead of destructuring based on iteration, the right side of assignments can also be destructured using property names.
+This follows the syntax for NamedTuples, and works by assigning to each variable on the left a
+property of the right side of the assignment with the same name using `getproperty`:
+
+```julia
+julia> (; b, a) = (a=1, b=2, c=3)
+(a = 1, b = 2, c = 3)
+
+julia> a
+1
+
+julia> b
+2
+```
 
 ## Argument destructuring
 
@@ -416,7 +510,25 @@ julia> gap(minmax(10, 2))
 Notice the extra set of parentheses in the definition of `gap`. Without those, `gap`
 would be a two-argument function, and this example would not work.
 
-For anonymous functions, destructuring a single tuple requires an extra comma:
+Similarly, property destructuring can also be used for function arguments:
+
+```julia
+julia> foo((; x, y)) = x + y
+foo (generic function with 1 method)
+
+julia> foo((x=1, y=2))
+3
+
+julia> struct A
+           x
+           y
+       end
+
+julia> foo(A(3, 4))
+7
+```
+
+For anonymous functions, destructuring a single argument requires an extra comma:
 
 ```
 julia> map(((x,y),) -> x + y, [(1,2), (3,4)])
diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md
index 9caa52792be939..f7bdfd2a64afc7 100644
--- a/doc/src/manual/getting-started.md
+++ b/doc/src/manual/getting-started.md
@@ -61,7 +61,7 @@ bar
 The `--` delimiter can be used to separate command-line arguments intended for the script file from arguments intended for Julia:
 
 ```
-$ julia --color=yes -O -- foo.jl arg1 arg2..
+$ julia --color=yes -O -- script.jl arg1 arg2..
 ```
 
 See also [Scripting](@ref man-scripting) for more information on writing Julia scripts.
@@ -103,3 +103,20 @@ command-line-options).
 ## Resources
 
 A curated list of useful learning resources to help new users get started can be found on the [learning](https://julialang.org/learning/) page of the main Julia web site.
+
+You can use the REPL as a learning resource by switching into the help mode.
+Switch to help mode by pressing `?` at an empty `julia> ` prompt, before typing
+anything else. Typing a keyword in help mode will fetch the documentation for
+it, along with examples. Similarly for most functions or other objects you
+might encounter!
+
+```
+help?> begin
+search: begin disable_sigint reenable_sigint
+
+  begin
+
+  begin...end denotes a block of code.
+```
+
+If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips).
diff --git a/doc/src/manual/integers-and-floating-point-numbers.md b/doc/src/manual/integers-and-floating-point-numbers.md
index 9b5b24637e5aee..8ba962da27184c 100644
--- a/doc/src/manual/integers-and-floating-point-numbers.md
+++ b/doc/src/manual/integers-and-floating-point-numbers.md
@@ -369,6 +369,7 @@ the real number line:
 | `-Inf16`  | `-Inf32`  | `-Inf`    | negative infinity | a value less than all finite floating-point values              |
 | `NaN16`   | `NaN32`   | `NaN`     | not a number      | a value not `==` to any floating-point value (including itself) |
 
+
 For further discussion of how these non-finite floating-point values are ordered with respect
 to each other and other floats, see [Numeric Comparisons](@ref). By the [IEEE 754 standard](https://en.wikipedia.org/wiki/IEEE_754-2008),
 these floating-point values are the results of certain arithmetic operations:
@@ -409,6 +410,18 @@ NaN
 
 julia> 0 * Inf
 NaN
+
+julia> NaN == NaN
+false
+
+julia> NaN != NaN
+true
+
+julia> NaN < NaN
+false
+
+julia> NaN > NaN
+false
 ```
 
 The [`typemin`](@ref) and [`typemax`](@ref) functions also apply to floating-point types:
diff --git a/doc/src/manual/interfaces.md b/doc/src/manual/interfaces.md
index a6539c457c6fc4..7333434b87afb8 100644
--- a/doc/src/manual/interfaces.md
+++ b/doc/src/manual/interfaces.md
@@ -371,7 +371,7 @@ julia> A[1:2,:]
  2.0  5.0  8.0
 ```
 
-In this example it is accomplished by defining `Base.similar{T}(A::SparseArray, ::Type{T}, dims::Dims)`
+In this example it is accomplished by defining `Base.similar(A::SparseArray, ::Type{T}, dims::Dims) where T`
 to create the appropriate wrapped array. (Note that while `similar` supports 1- and 2-argument
 forms, in most case you only need to specialize the 3-argument form.) For this to work it's important
 that `SparseArray` is mutable (supports `setindex!`). Defining `similar`, `getindex` and
@@ -479,7 +479,7 @@ they are iterable collections of their characters (see [Strings](@ref) for more)
 The next two steps (selecting the output array and implementation) are dependent upon
 determining a single answer for a given set of arguments. Broadcast must take all the varied
 types of its arguments and collapse them down to just one output array and one
-implementation. Broadcast calls this single answer a "style." Every broadcastable object
+implementation. Broadcast calls this single answer a "style". Every broadcastable object
 each has its own preferred style, and a promotion-like system is used to combine these
 styles into a single answer — the "destination style".
 
@@ -549,7 +549,7 @@ Base.showarg(io::IO, A::ArrayAndChar, toplevel) = print(io, typeof(A), " with ch
 
 ```
 
-You might want broadcasting to preserve the `char` "metadata." First we define
+You might want broadcasting to preserve the `char` "metadata". First we define
 
 ```jldoctest ArrayAndChar; output = false
 Base.BroadcastStyle(::Type{<:ArrayAndChar}) = Broadcast.ArrayStyle{ArrayAndChar}()
diff --git a/doc/src/manual/mathematical-operations.md b/doc/src/manual/mathematical-operations.md
index 850c1f15bbd173..c285ed9abbabd2 100644
--- a/doc/src/manual/mathematical-operations.md
+++ b/doc/src/manual/mathematical-operations.md
@@ -9,18 +9,18 @@ collection of standard mathematical functions.
 The following [arithmetic operators](https://en.wikipedia.org/wiki/Arithmetic#Arithmetic_operations)
 are supported on all primitive numeric types:
 
-| Expression | Name           | Description                            |
-|:---------- |:-------------- |:-------------------------------------- |
-| `+x`       | unary plus     | the identity operation                 |
-| `-x`       | unary minus    | maps values to their additive inverses |
-| `x + y`    | binary plus    | performs addition                      |
-| `x - y`    | binary minus   | performs subtraction                   |
-| `x * y`    | times          | performs multiplication                |
-| `x / y`    | divide         | performs division                      |
-| `x ÷ y`    | integer divide | x / y, truncated to an integer         |
-| `x \ y`    | inverse divide | equivalent to `y / x`                  |
-| `x ^ y`    | power          | raises `x` to the `y`th power          |
-| `x % y`    | remainder      | equivalent to `rem(x,y)`               |
+| Expression | Name           | Description                             |
+|:---------- |:-------------- |:----------------------------------------|
+| `+x`       | unary plus     | the identity operation                  |
+| `-x`       | unary minus    | maps values to their additive inverses  |
+| `x + y`    | binary plus    | performs addition                       |
+| `x - y`    | binary minus   | performs subtraction                    |
+| `x * y`    | times          | performs multiplication                 |
+| `x / y`    | divide         | performs division                       |
+| `x ÷ y`    | integer divide | x / y, truncated to an integer          |
+| `x \ y`    | inverse divide | equivalent to `y / x`                   |
+| `x ^ y`    | power          | raises `x` to the `y`th power           |
+| `x % y`    | remainder      | equivalent to `rem(x,y)`                |
 
 A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x+y)`, is treated as a multiplication, except with higher precedence than other binary operations.  See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
 
@@ -28,6 +28,8 @@ Julia's promotion system makes arithmetic operations on mixtures of argument typ
 naturally and automatically. See [Conversion and Promotion](@ref conversion-and-promotion) for details of the promotion
 system.
 
+The ÷ sign can be conveniently typed by writing `\div<tab>` to the REPL or Julia IDE. See the [manual section on Unicode input](@ref Unicode-Input) for more information.
+
 Here are some simple examples using arithmetic operators:
 
 ```jldoctest
@@ -67,7 +69,7 @@ The following [Boolean operators](https://en.wikipedia.org/wiki/Boolean_algebra#
 | `x && y`   | [short-circuiting and](@ref man-conditional-evaluation) |
 | `x \|\| y` | [short-circuiting or](@ref man-conditional-evaluation)  |
 
-Negation changes `true` to `false` and vice versa. The short-circuiting opeations are explained on the linked page.
+Negation changes `true` to `false` and vice versa. The short-circuiting operations are explained on the linked page.
 
 Note that `Bool` is an integer type and all the usual promotion rules and numeric operators are also defined on it.
 
@@ -82,6 +84,8 @@ are supported on all primitive integer types:
 | `x & y`    | bitwise and                                                              |
 | `x \| y`   | bitwise or                                                               |
 | `x ⊻ y`    | bitwise xor (exclusive or)                                               |
+| `x ⊼ y`    | bitwise nand (not and)                                                   |
+| `x ⊽ y`    | bitwise nor (not or)                                                     |
 | `x >>> y`  | [logical shift](https://en.wikipedia.org/wiki/Logical_shift) right       |
 | `x >> y`   | [arithmetic shift](https://en.wikipedia.org/wiki/Arithmetic_shift) right |
 | `x << y`   | logical/arithmetic shift left                                            |
@@ -104,6 +108,18 @@ julia> 123 ⊻ 234
 julia> xor(123, 234)
 145
 
+julia> nand(123, 123)
+-124
+
+julia> 123 ⊼ 123
+-124
+
+julia> nor(123, 124)
+-128
+
+julia> 123 ⊽ 124
+-128
+
 julia> ~UInt32(123)
 0xffffff84
 
diff --git a/doc/src/manual/metaprogramming.md b/doc/src/manual/metaprogramming.md
index 9880bf4417867f..a374b9c879e6af 100644
--- a/doc/src/manual/metaprogramming.md
+++ b/doc/src/manual/metaprogramming.md
@@ -466,7 +466,7 @@ julia> eval(ex)
 
 ## [Macros](@id man-macros)
 
-Macros provide a method to include generated code in the final body of a program. A macro maps
+Macros provide a mechanism to include generated code in the final body of a program. A macro maps
 a tuple of arguments to a returned *expression*, and the resulting expression is compiled directly
 rather than requiring a runtime [`eval`](@ref) call. Macro arguments may include expressions,
 literal values, and symbols.
@@ -981,13 +981,13 @@ block:
 end
 ```
 
-## Non-Standard String Literals
+## [Non-Standard String Literals](@id meta-non-standard-string-literals)
 
 Recall from [Strings](@ref non-standard-string-literals) that string literals prefixed by an identifier are called non-standard
 string literals, and can have different semantics than un-prefixed string literals. For example:
 
-  * `r"^\s*(?:#|$)"` produces a regular expression object rather than a string
-  * `b"DATA\xff\u2200"` is a byte array literal for `[68,65,84,65,255,226,136,128]`.
+  * `r"^\s*(?:#|$)"` produces a [regular expression object](@ref man-regex-literals) rather than a string
+  * `b"DATA\xff\u2200"` is a [byte array literal](@ref man-byte-array-literals) for `[68,65,84,65,255,226,136,128]`.
 
 Perhaps surprisingly, these behaviors are not hard-coded into the Julia parser or compiler. Instead,
 they are custom behaviors provided by a general mechanism that anyone can use: prefixed string
@@ -1051,20 +1051,9 @@ constructed on each iteration. In the vast majority of use cases, however, regul
 are not constructed based on run-time data. In this majority of cases, the ability to write regular
 expressions as compile-time values is invaluable.
 
-Like non-standard string literals, non-standard command literals exist using a prefixed variant
-of the command literal syntax. The command literal ```custom`literal` ``` is parsed as `@custom_cmd "literal"`.
-Julia itself does not contain any non-standard command literals, but packages can make use of
-this syntax. Aside from the different syntax and the `_cmd` suffix instead of the `_str` suffix,
-non-standard command literals behave exactly like non-standard string literals.
-
-In the event that two modules provide non-standard string or command literals with the same name,
-it is possible to qualify the string or command literal with a module name. For instance, if both
-`Foo` and `Bar` provide non-standard string literal `@x_str`, then one can write `Foo.x"literal"`
-or `Bar.x"literal"` to disambiguate between the two.
-
 The mechanism for user-defined string literals is deeply, profoundly powerful. Not only are Julia's
-non-standard literals implemented using it, but also the command literal syntax (``` `echo "Hello, $person"` ```)
-is implemented with the following innocuous-looking macro:
+non-standard literals implemented using it, but the command literal syntax (``` `echo "Hello, $person"` ```)
+is also implemented using the following innocuous-looking macro:
 
 ```julia
 macro cmd(str)
@@ -1077,6 +1066,20 @@ but they are just functions, written entirely in Julia. You can read their sourc
 what they do -- and all they do is construct expression objects to be inserted into your program's
 syntax tree.
 
+Like string literals, command literals can also be prefixed by an identifier
+to form what are called non-standard command literals. These command literals are parsed
+as calls to specially-named macros. For example, the syntax ```custom`literal` ``` is parsed
+as `@custom_cmd "literal"`.
+Julia itself does not contain any non-standard command literals, but packages can make use of
+this syntax. Aside from the different syntax and the `_cmd` suffix instead of the `_str` suffix,
+non-standard command literals behave exactly like non-standard string literals.
+
+In the event that two modules provide non-standard string or command literals with the same name,
+it is possible to qualify the string or command literal with a module name. For instance, if both
+`Foo` and `Bar` provide non-standard string literal `@x_str`, then one can write `Foo.x"literal"`
+or `Bar.x"literal"` to disambiguate between the two.
+
+
 Another way to define a macro would be like this:
 
 ```julia
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index 73d41294173bec..1985292d66611a 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -41,7 +41,7 @@ for structuring and organizing programs.
     it can be omitted altogether, writing just `meth(arg1,arg2)`, with `this` implied as the receiving
     object.
 !!! note
-    All the examples in this chapter assume that you are defining modules for a function in the *same*
+    All the examples in this chapter assume that you are defining methods for a function in the *same*
     module. If you want to add methods to a function in *another* module, you have to `import` it or
     use the name qualified with module names. See the section on [namespace management](@ref
     namespace-management).
@@ -546,38 +546,19 @@ Here are a few common design patterns that come up sometimes when using dispatch
 ### Extracting the type parameter from a super-type
 
 
-Here is the correct code template for returning the element-type `T`
-of any arbitrary subtype of `AbstractArray`:
+Here is a correct code template for returning the element-type `T`
+of any arbitrary subtype of `AbstractArray` that has well-defined
+element type:
 
 ```julia
 abstract type AbstractArray{T, N} end
 eltype(::Type{<:AbstractArray{T}}) where {T} = T
 ```
-using so-called triangular dispatch.  Note that if `T` is a `UnionAll`
-type, as e.g. `eltype(Array{T} where T <: Integer)`, then `Any` is
-returned (as does the version of `eltype` in `Base`).
 
-Another way, which used to be the only correct way before the advent of
-triangular dispatch in Julia v0.6, is:
-
-```julia
-abstract type AbstractArray{T, N} end
-eltype(::Type{AbstractArray}) = Any
-eltype(::Type{AbstractArray{T}}) where {T} = T
-eltype(::Type{AbstractArray{T, N}}) where {T, N} = T
-eltype(::Type{A}) where {A<:AbstractArray} = eltype(supertype(A))
-```
-
-Another possibility is the following, which could be useful to adapt
-to cases where the parameter `T` would need to be matched more
-narrowly:
-```julia
-eltype(::Type{AbstractArray{T, N} where {T<:S, N<:M}}) where {M, S} = Any
-eltype(::Type{AbstractArray{T, N} where {T<:S}}) where {N, S} = Any
-eltype(::Type{AbstractArray{T, N} where {N<:M}}) where {M, T} = T
-eltype(::Type{AbstractArray{T, N}}) where {T, N} = T
-eltype(::Type{A}) where {A <: AbstractArray} = eltype(supertype(A))
-```
+using so-called triangular dispatch.  Note that `UnionAll` types, for
+example `eltype(AbstractArray{T} where T <: Integer)`, do not match the
+above method. The implementation of `eltype` in `Base` adds a fallback
+method to `Any` for such cases.
 
 
 One common mistake is to try and get the element-type by using introspection:
@@ -596,6 +577,25 @@ Here we have created a type `BitVector` which has no parameters,
 but where the element-type is still fully specified, with `T` equal to `Bool`!
 
 
+Another mistake is to try to walk up the type hierarchy using
+`supertype`:
+```julia
+eltype_wrong(::Type{AbstractArray{T}}) where {T} = T
+eltype_wrong(::Type{AbstractArray{T, N}}) where {T, N} = T
+eltype_wrong(::Type{A}) where {A<:AbstractArray} = eltype_wrong(supertype(A))
+```
+
+While this works for declared types, it fails for types without
+supertypes:
+
+```julia-repl
+julia> eltype_wrong(Union{AbstractArray{Int}, AbstractArray{Float64}})
+ERROR: MethodError: no method matching supertype(::Type{Union{AbstractArray{Float64,N} where N, AbstractArray{Int64,N} where N}})
+Closest candidates are:
+  supertype(::DataType) at operators.jl:43
+  supertype(::UnionAll) at operators.jl:48
+```
+
 ### Building a similar type with a different type parameter
 
 When building generic code, there is often a need for constructing a similar
diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md
index a6a5187ee34acc..b53e9468775ae0 100644
--- a/doc/src/manual/modules.md
+++ b/doc/src/manual/modules.md
@@ -303,7 +303,7 @@ include(p) = Base.include(Mod, p)
 end
 ```
 
-If a module containing no names or imports is needed, they can be defined with `Mod = Module(:Mod, nothing)`.
+If modules containing no names or imports are needed, they can be defined with `Mod = Module(:Mod, nothing)`.
 Code can be evaluated in them with [`@eval`](@ref) or [`Core.eval`](@ref).
 
 ### Standard modules
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index 952e7acea35abd..135b5c3a6589e4 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -8,7 +8,7 @@ of Julia multi-threading features.
 By default, Julia starts up with a single thread of execution. This can be verified by using the
 command [`Threads.nthreads()`](@ref):
 
-```julia-repl
+```jldoctest
 julia> Threads.nthreads()
 1
 ```
@@ -37,7 +37,7 @@ julia> Threads.nthreads()
 
 But we are currently on the master thread. To check, we use the function [`Threads.threadid`](@ref)
 
-```julia-repl
+```jldoctest
 julia> Threads.threadid()
 1
 ```
@@ -147,7 +147,7 @@ to its assigned locations:
 
 ```julia-repl
 julia> a
-10-element Array{Float64,1}:
+10-element Vector{Float64}:
  1.0
  1.0
  1.0
@@ -182,14 +182,17 @@ julia> Threads.@threads for id in 1:4
        end
 
 julia> old_is
-4-element Array{Float64,1}:
+4-element Vector{Float64}:
  0.0
  1.0
  7.0
  3.0
 
+julia> i[]
+ 10
+
 julia> ids
-4-element Array{Float64,1}:
+4-element Vector{Float64}:
  1.0
  2.0
  3.0
@@ -227,11 +230,28 @@ julia> acc[]
 1000
 ```
 
-!!! note
-    Not *all* primitive types can be wrapped in an `Atomic` tag. Supported types
-    are `Int8`, `Int16`, `Int32`, `Int64`, `Int128`, `UInt8`, `UInt16`, `UInt32`,
-    `UInt64`, `UInt128`, `Float16`, `Float32`, and `Float64`. Additionally,
-    `Int128` and `UInt128` are not supported on AAarch32 and ppc64le.
+
+## [Per-field atomics](@id man-atomics)
+
+We can also use atomics on a more granular level using the [`@atomic`](@ref
+Base.@atomic), [`@atomicswap`](@ref Base.@atomicswap), and
+[`@atomicreplace`](@ref Base.@atomicreplace) macros.
+
+Specific details of the memory model and other details of the design are written
+in the [Julia Atomics
+Manifesto](https://gist.github.com/vtjnash/11b0031f2e2a66c9c24d33e810b34ec0),
+which will later be published formally.
+
+Any field in a struct declaration can be decorated with `@atomic`, and then any
+write must be marked with `@atomic` also, and must use one of the defined atomic
+orderings (`:monotonic`, `:acquire`, `:release`, `:acquire_release`, or
+`:sequentially_consistent`). Any read of an atomic field can also be annotated
+with an atomic ordering constraint, or will be done with monotonic (relaxed)
+ordering if unspecified.
+
+!!! compat "Julia 1.7"
+    Per-field atomics requires at least Julia 1.7.
+
 
 ## Side effects and mutable function arguments
 
@@ -241,6 +261,7 @@ For instance functions that have a
 [name ending with `!`](@ref bang-convention)
 by convention modify their arguments and thus are not pure.
 
+
 ## @threadcall
 
 External libraries, such as those called via [`ccall`](@ref), pose a problem for
diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md
index 163716c5838040..fc62632433850c 100644
--- a/doc/src/manual/networking-and-streams.md
+++ b/doc/src/manual/networking-and-streams.md
@@ -193,13 +193,13 @@ Let's first create a simple server:
 ```julia-repl
 julia> using Sockets
 
-julia> @async begin
+julia> errormonitor(@async begin
            server = listen(2000)
            while true
                sock = accept(server)
                println("Hello World\n")
            end
-       end
+       end)
 Task (runnable) @0x00007fd31dc11ae0
 ```
 
@@ -265,7 +265,7 @@ printed the message and waited for the next client. Reading and writing works in
 To see this, consider the following simple echo server:
 
 ```julia-repl
-julia> @async begin
+julia> errormonitor(@async begin
            server = listen(2001)
            while true
                sock = accept(server)
@@ -273,15 +273,15 @@ julia> @async begin
                    write(sock, readline(sock, keep=true))
                end
            end
-       end
+       end)
 Task (runnable) @0x00007fd31dc12e60
 
 julia> clientside = connect(2001)
 TCPSocket(RawFD(28) open, 0 bytes waiting)
 
-julia> @async while isopen(clientside)
+julia> errormonitor(@async while isopen(clientside)
            write(stdout, readline(clientside, keep=true))
-       end
+       end)
 Task (runnable) @0x00007fd31dc11870
 
 julia> println(clientside,"Hello World from the Echo Server")
@@ -351,3 +351,68 @@ Finished connection to google.com
 Finished connection to julialang.org
 Finished connection to github.com
 ```
+
+## Multicast
+
+Julia supports [multicast](https://datatracker.ietf.org/doc/html/rfc1112) over IPv4 and IPv6 using the User Datagram Protocol ([UDP](https://datatracker.ietf.org/doc/html/rfc768)) as transport.
+
+Unlike the Transmission Control Protocol ([TCP](https://datatracker.ietf.org/doc/html/rfc793)), UDP makes almost no assumptions about the needs of the application.
+TCP provides flow control (it accelerates and decelerates to maximize throughput), reliability (lost or corrupt packets are automatically retransmitted), sequencing (packets are ordered by the operating system before they are given to the application), segment size, and session setup and teardown.
+UDP provides no such features.
+
+A common use for UDP is in multicast applications.
+TCP is a stateful protocol for communication between exactly two devices.
+UDP can use special multicast addresses to allow simultaneous communication between many devices.
+
+### Receiving IP Multicast Packets
+
+To transmit data over UDP multicast, simply `recv` on the socket, and the first packet received will be returned. Note that it may not be the first packet that you sent however!
+
+```
+using Sockets
+group = ip"228.5.6.7"
+socket = Sockets.UDPSocket()
+bind(socket, ip"0.0.0.0", 6789)
+join_multicast_group(socket, group)
+println(String(recv(socket)))
+leave_multicast_group(socket, group)
+close(socket)
+```
+
+### Sending IP Multicast Packets
+
+To transmit data over UDP multicast, simply `send` to the socket.
+Notice that it is not necessary for a sender to join the multicast group.
+
+```
+using Sockets
+group = ip"228.5.6.7"
+socket = Sockets.UDPSocket()
+send(socket, group, 6789, "Hello over IPv4")
+close(socket)
+```
+
+### IPv6 Example
+
+This example gives the same functionality as the previous program, but uses IPv6 as the network-layer protocol.
+
+Listener:
+```
+using Sockets
+group = Sockets.IPv6("ff05::5:6:7")
+socket = Sockets.UDPSocket()
+bind(socket, Sockets.IPv6("::"), 6789)
+join_multicast_group(socket, group)
+println(String(recv(socket)))
+leave_multicast_group(socket, group)
+close(socket)
+```
+
+Sender:
+```
+using Sockets
+group = Sockets.IPv6("ff05::5:6:7")
+socket = Sockets.UDPSocket()
+send(socket, group, 6789, "Hello over IPv6")
+close(socket)
+```
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index ad1727ec6d1c00..69e236db6c7d16 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -163,6 +163,9 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, a range like `a:b` is not shorthand for a vector like in R, but is a specialized `AbstractRange`
     object that is used for iteration. To convert a range into a vector, use
     [`collect(a:b)`](@ref).
+  * The `:` operator has a different precedence in R and Julia. In particular, in Julia arithmetic operators
+    have higher precedence than the `:` operator, whereas the reverse is true in R. For example, `1:n-1` in
+    Julia is equivalent to `1:(n-1)` in R.
   * Julia's [`max`](@ref) and [`min`](@ref) are the equivalent of `pmax` and `pmin` respectively
     in R, but both arguments need to have the same dimensions.  While [`maximum`](@ref) and [`minimum`](@ref)
     replace `max` and `min` in R, there are important differences.
@@ -240,7 +243,7 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, the standard operators over a matrix type are matrix operations, whereas, in Python, the standard operators are element-wise operations. When both `A` and `B` are matrices, `A * B` in Julia performs matrix multiplication, not element-wise multiplication as in Python. `A * B` in Julia is equivalent with `A @ B` in Python, whereas `A * B` in Python is equivalent with `A .* B` in Julia.
   * The adjoint operator `'` in Julia returns an adjoint of a vector (a lazy representation of row vector), whereas the transpose operator `.T` over a vector in Python returns the original vector (non-op).
   * In Julia, a function may contain multiple concrete implementations (called *methods*), which are selected via multiple dispatch based on the types of all arguments to the call, as compared to functions in Python, which have a single implementation and no polymorphism (as opposed to Python method calls which use a different syntax and allows dispatch on the receiver of the method).
-  * There are no classes in Julia. Instead they are structures (mutable or immutable), containing data but no methods.
+  * There are no classes in Julia. Instead there are structures (mutable or immutable), containing data but no methods.
   * Calling a method of a class instance in Python (`x = MyClass(*args); x.f(y)`) corresponds to a function call in Julia, e.g. `x = MyType(args...); f(x, y)`. In general, multiple dispatch is more flexible and powerful than the Python class system.
   * Julia structures may have exactly one abstract supertype, whereas Python classes can inherit from one or more (abstract or concrete) superclasses.
   * The logical Julia program structure (Packages and Modules) is independent of the file structure (`include` for additional files), whereas the Python code structure is defined by directories (Packages) and files (Modules).
@@ -353,7 +356,13 @@ For users coming to Julia from R, these are some noteworthy differences:
 
 - The typical Julia workflow for prototyping also uses continuous manipulation of the image, implemented with the [Revise.jl](https://github.com/timholy/Revise.jl) package.
 
-- Bignums are supported, but conversion is not automatic; ordinary integers [overflow](@ref faq-integer-arithmetic).
+- For performance, Julia prefers that operations have [type stability](@ref man-type-stability). Where Common Lisp abstracts away from the underlying machine operations, Julia cleaves closer to them. For example:
+  - Integer division using `/` always returns a floating-point result, even if the computation is exact.
+    - `//` always returns a rational result
+    - `÷` always returns a (truncated) integer result
+  - Bignums are supported, but conversion is not automatic; ordinary integers [overflow](@ref faq-integer-arithmetic).
+  - Complex numbers are supported, but to get complex results, [you need complex inputs](@ref faq-domain-errors).
+  - There are multiple Complex and Rational types, with different component types.
 
 - Modules (namespaces) can be hierarchical. [`import`](@ref) and [`using`](@ref) have a dual role: they load the code and make it available in the namespace. `import` for only the module name is possible (roughly equivalent to `ASDF:LOAD-OP`). Slot names don't need to be exported separately. Global variables can't be assigned to from outside the module (except with `eval(mod, :(var = val))` as an escape hatch).
 
diff --git a/doc/src/manual/parallel-computing.md b/doc/src/manual/parallel-computing.md
index 71c3ba5354c1fc..80df333a8ab04f 100644
--- a/doc/src/manual/parallel-computing.md
+++ b/doc/src/manual/parallel-computing.md
@@ -8,7 +8,7 @@ Julia supports these four categories of concurrent and parallel programming:
     for I/O, event handling, producer-consumer processes, and similar patterns.
     Tasks can synchronize through operations like [`wait`](@ref) and [`fetch`](@ref), and
     communicate via [`Channel`](@ref)s. While strictly not parallel computing by themselves,
-    Julia lets you schedule `Task`s on several threads.
+    Julia lets you schedule [`Task`](@ref)s on several threads.
 
 2. **Multi-threading**:
 
@@ -21,7 +21,7 @@ Julia supports these four categories of concurrent and parallel programming:
 3. **Distributed computing**:
 
     Distributed computing runs multiple Julia processes with separate memory spaces. These can be on the same
-    computer or multiple computers. The `Distributed` standard library provides the capability for remote execution
+    computer or multiple computers. The [`Distributed`](@ref man-distributed) standard library provides the capability for remote execution
     of a Julia function. With this basic building block, it is possible to build many different kinds of
     distributed computing abstractions. Packages like [`DistributedArrays.jl`](https://github.com/JuliaParallel/DistributedArrays.jl)
     are an example of such an abstraction. On the other hand, packages like [`MPI.jl`](https://github.com/JuliaParallel/MPI.jl) and
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index 9f0a0b9ff4f4cc..7d1f448456167a 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -77,12 +77,12 @@ julia> function sum_global()
        end;
 
 julia> @time sum_global()
-  0.009639 seconds (7.36 k allocations: 300.310 KiB, 98.32% compilation time)
-496.84883432553846
+  0.026328 seconds (9.30 k allocations: 416.747 KiB, 36.50% gc time, 99.48% compilation time)
+508.39048990953665
 
 julia> @time sum_global()
-  0.000140 seconds (3.49 k allocations: 70.313 KiB)
-496.84883432553846
+  0.000075 seconds (3.49 k allocations: 70.156 KiB)
+508.39048990953665
 ```
 
 On the first call (`@time sum_global()`) the function gets compiled. (If you've not yet used [`@time`](@ref)
@@ -113,12 +113,12 @@ julia> function sum_arg(x)
        end;
 
 julia> @time sum_arg(x)
-  0.006202 seconds (4.18 k allocations: 217.860 KiB, 99.72% compilation time)
-496.84883432553846
+  0.010298 seconds (4.23 k allocations: 226.021 KiB, 99.81% compilation time)
+508.39048990953665
 
 julia> @time sum_arg(x)
   0.000005 seconds (1 allocation: 16 bytes)
-496.84883432553846
+508.39048990953665
 ```
 
 The 1 allocation seen is from running the `@time` macro itself in global scope. If we instead run
@@ -129,7 +129,7 @@ julia> time_sum(x) = @time sum_arg(x);
 
 julia> time_sum(x)
   0.000001 seconds
-496.84883432553846
+508.39048990953665
 ```
 
 In some situations, your function may need to allocate memory as part of its operation, and this
@@ -325,7 +325,7 @@ Float32
 
 For all practical purposes, such objects behave identically to those of `MyStillAmbiguousType`.
 
-It's quite instructive to compare the sheer amount code generated for a simple function
+It's quite instructive to compare the sheer amount of code generated for a simple function
 
 ```julia
 func(m::MyType) = m.a+1
@@ -342,6 +342,14 @@ For reasons of length the results are not shown here, but you may wish to try th
 the type is fully-specified in the first case, the compiler doesn't need to generate any code
 to resolve the type at run-time. This results in shorter and faster code.
 
+One should also keep in mind that not-fully-parameterized types behave like abstract types. For example, even though a fully specified `Array{T,n}` is concrete, `Array` itself with no parameters given is not concrete:
+
+```jldoctest myambig3
+julia> !isconcretetype(Array), !isabstracttype(Array), isstructtype(Array), !isconcretetype(Array{Int}), isconcretetype(Array{Int,1})
+(true, true, true, true, true)
+```
+In this case, it would be better to avoid declaring `MyType` with a field `a::Array` and instead declare the field as `a::Array{T,N}` or as `a::A`, where `{T,N}` or `A` are parameters of `MyType`.
+
 ### Avoid fields with abstract containers
 
 The same best practices also work for container types:
@@ -354,6 +362,10 @@ julia> struct MySimpleContainer{A<:AbstractVector}
 julia> struct MyAmbiguousContainer{T}
            a::AbstractVector{T}
        end
+
+julia> struct MyAlsoAmbiguousContainer
+           a::Array
+       end
 ```
 
 For example:
@@ -378,6 +390,17 @@ julia> b = MyAmbiguousContainer([1:3;]);
 
 julia> typeof(b)
 MyAmbiguousContainer{Int64}
+
+julia> d = MyAlsoAmbiguousContainer(1:3);
+
+julia> typeof(d), typeof(d.a)
+(MyAlsoAmbiguousContainer, Vector{Int64})
+
+julia> d = MyAlsoAmbiguousContainer(1:1.0:3);
+
+julia> typeof(d), typeof(d.a)
+(MyAlsoAmbiguousContainer, Vector{Float64})
+
 ```
 
 For `MySimpleContainer`, the object is fully-specified by its type and parameters, so the compiler
@@ -1514,7 +1537,7 @@ The following examples may help you interpret expressions marked as containing n
         element accesses
 
   * `Base.getfield(%%x, :(:data))::ARRAY{FLOAT64,N} WHERE N`
-      * Interpretation: getting a field that is of non-leaf type. In this case, `ArrayContainer` had a
+      * Interpretation: getting a field that is of non-leaf type. In this case, the type of `x`, say `ArrayContainer`, had a
         field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
       * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
         of `ArrayContainer`
@@ -1600,11 +1623,3 @@ will not require this degree of programmer annotation to attain performance.
 In the mean time, some user-contributed packages like
 [FastClosures](https://github.com/c42f/FastClosures.jl) automate the
 insertion of `let` statements as in `abmult3`.
-
-## Checking for equality with a singleton
-
-When checking if a value is equal to some singleton it can be
-better for performance to check for identicality (`===`) instead of
-equality (`==`). The same advice applies to using `!==` over `!=`.
-These type of checks frequently occur e.g. when implementing the iteration
-protocol and checking if `nothing` is returned from [`iterate`](@ref).
diff --git a/doc/src/manual/profile.md b/doc/src/manual/profile.md
index 8afadf959dbf10..b736c46f90282b 100644
--- a/doc/src/manual/profile.md
+++ b/doc/src/manual/profile.md
@@ -341,7 +341,7 @@ For example with `OProfile` you can try a simple recording :
 Or similary with `perf` :
 
 ```
-$ ENABLE_JITPROFILING=1 perf record -o /tmp/perf.data --call-graph dwarf ./julia /test/fastmath.jl
+$ ENABLE_JITPROFILING=1 perf record -o /tmp/perf.data --call-graph dwarf -k 1 ./julia /test/fastmath.jl
 $ perf inject --jit --input /tmp/perf.data --output /tmp/perf-jit.data
 $ perf report --call-graph -G -i /tmp/perf-jit.data
 ```
diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md
index 0f1b7d255835de..e643ffff3ee61d 100644
--- a/doc/src/manual/running-external-programs.md
+++ b/doc/src/manual/running-external-programs.md
@@ -326,6 +326,8 @@ wait(writer)
 fetch(reader)
 ```
 
+(commonly also, reader is not a separate task, since we immediately `fetch` it anyways).
+
 ### Complex Example
 
 The combination of a high-level programming language, a first-class command abstraction, and automatic
@@ -374,11 +376,36 @@ saturated throughput.
 We strongly encourage you to try all these examples to see how they work.
 
 ## `Cmd` Objects
-The syntax introduced above creates objects of type [`Cmd`](@ref). Such object may also be constructed directly:
+The backtick syntax create an object of type [`Cmd`](@ref). Such object may also be constructed directly from
+an existing `Cmd` or list of arguments:
 
 ```julia
 run(Cmd(`pwd`, dir=".."))
+run(Cmd(["pwd"], detach=true, ignorestatus=true))
 ```
 
-This way, they may be customized with the `dir` keyword to set the working directory,
-`detach` keyword to run the command in a new process group, and `env` keyword to set environment variables.
+This allows you to specify several aspects of the `Cmd`'s execution environment via keyword arguments. For
+example, the `dir` keyword provides control over the `Cmd`'s working directory:
+
+```jldoctest
+julia> run(Cmd(`pwd`, dir="/"));
+/
+```
+
+And the `env` keyword allows you to set execution environment variables:
+
+```jldoctest
+julia> run(Cmd(`sh -c "echo foo \$HOWLONG"`, env=("HOWLONG" => "ever!",)));
+foo ever!
+```
+
+See [`Cmd`](@ref) for additional keyword arguments. The [`setenv`](@ref) and [`addenv`](@ref) commands
+provide another means for replacing or adding to the `Cmd` execution environment variables, respectively:
+
+```jldoctest
+julia> run(setenv(`sh -c "echo foo \$HOWLONG"`, ("HOWLONG" => "ever!",)));
+foo ever!
+
+julia> run(addenv(`sh -c "echo foo \$HOWLONG"`, "HOWLONG" => "ever!"));
+foo ever!
+```
diff --git a/doc/src/manual/stacktraces.md b/doc/src/manual/stacktraces.md
index 50cdfd5b1ed64a..40130d9e7dd445 100644
--- a/doc/src/manual/stacktraces.md
+++ b/doc/src/manual/stacktraces.md
@@ -185,7 +185,7 @@ ERROR: Whoops!
 [...]
 ```
 
-## Exception stacks and `catch_stack`
+## Exception stacks and [`current_exceptions`](@ref)
 
 !!! compat "Julia 1.1"
     Exception stacks requires at least Julia 1.1.
@@ -195,7 +195,7 @@ identify the root cause of a problem. The julia runtime supports this by pushing
 *exception stack* as it occurs. When the code exits a `catch` normally, any exceptions which were pushed onto the stack
 in the associated `try` are considered to be successfully handled and are removed from the stack.
 
-The stack of current exceptions can be accessed using the experimental [`Base.catch_stack`](@ref) function. For example,
+The stack of current exceptions can be accessed using the [`current_exceptions`](@ref) function. For example,
 
 ```julia-repl
 julia> try
@@ -204,7 +204,7 @@ julia> try
            try
                error("(B) An exception while handling the exception")
            catch
-               for (exc, bt) in Base.catch_stack()
+               for (exc, bt) in current_exceptions()
                    showerror(stdout, exc, bt)
                    println(stdout)
                end
@@ -233,7 +233,7 @@ exiting both catch blocks normally (i.e., without throwing a further exception)
 and are no longer accessible.
 
 The exception stack is stored on the `Task` where the exceptions occurred. When a task fails with uncaught exceptions,
-`catch_stack(task)` may be used to inspect the exception stack for that task.
+`current_exceptions(task)` may be used to inspect the exception stack for that task.
 
 ## Comparison with [`backtrace`](@ref)
 
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index f65a5526a85518..56a5a20c1cef49 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -166,6 +166,14 @@ julia> """Contains "quote" characters"""
 "Contains \"quote\" characters"
 ```
 
+Long lines in strings can be broken up by preceding the newline with a backslash (`\`):
+
+```jldoctest
+julia> "This is a long \
+       line"
+"This is a long line"
+```
+
 If you want to extract a character from a string, you index into it:
 
 ```jldoctest helloworldstring
@@ -639,6 +647,15 @@ julia> """
 "Hello,\nworld."
 ```
 
+If the newline is removed using a backslash, dedentation will be respected as well:
+
+```jldoctest
+julia> """
+         Averylong\
+         word"""
+"Averylongword"
+```
+
 Trailing whitespace is left unaltered.
 
 Triple-quoted string literals can contain `"` characters without escaping.
@@ -739,13 +756,16 @@ Some other useful functions include:
 
 There are situations when you want to construct a string or use string semantics, but the behavior
 of the standard string construct is not quite what is needed. For these kinds of situations, Julia
-provides [non-standard string literals](@ref). A non-standard string literal looks like a regular
-double-quoted string literal, but is immediately prefixed by an identifier, and doesn't behave
-quite like a normal string literal.  Regular expressions, byte array literals and version number
-literals, as described below, are some examples of non-standard string literals. Other examples
-are given in the [Metaprogramming](@ref) section.
+provides non-standard string literals. A non-standard string literal looks like a regular
+double-quoted string literal,
+but is immediately prefixed by an identifier, and may behave differently from a normal string literal.
+
+[Regular expressions](@ref man-regex-literals), [byte array literals](@ref man-byte-array-literals),
+and [version number literals](@ref man-version-number-literals), as described below,
+are some examples of non-standard string literals. Users and packages may also define new non-standard string literals.
+Further documentation is given in the [Metaprogramming](@ref meta-non-standard-string-literals) section.
 
-## Regular Expressions
+## [Regular Expressions](@id man-regex-literals)
 
 Julia has Perl-compatible regular expressions (regexes), as provided by the [PCRE](http://www.pcre.org/)
 library (a description of the syntax can be found [here](http://www.pcre.org/current/doc/html/pcre2syntax.html)). Regular expressions are related to strings in two ways: the obvious connection is that
@@ -798,7 +818,7 @@ else
 end
 ```
 
-If a regular expression does match, the value returned by [`match`](@ref) is a `RegexMatch`
+If a regular expression does match, the value returned by [`match`](@ref) is a [`RegexMatch`](@ref)
 object. These objects record how the expression matches, including the substring that the pattern
 matches and any captured substrings, if there are any. This example only captures the portion
 of the substring that matches, but perhaps we want to capture any non-blank text after the comment
@@ -879,10 +899,10 @@ julia> m.offsets
 ```
 
 It is convenient to have captures returned as an array so that one can use destructuring syntax
-to bind them to local variables:
+to bind them to local variables. As a convinience, the `RegexMatch` object implements iterator methods that pass through to the `captures` field, so you can destructure the match object directly:
 
 ```jldoctest acdmatch
-julia> first, second, third = m.captures; first
+julia> first, second, third = m; first
 "a"
 ```
 
diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md
index 10f4b8ea548782..5201152ce31a45 100644
--- a/doc/src/manual/style-guide.md
+++ b/doc/src/manual/style-guide.md
@@ -134,6 +134,32 @@ a = Vector{Union{Int,AbstractString,Tuple,Array}}(undef, n)
 In this case `Vector{Any}(undef, n)` is better. It is also more helpful to the compiler to annotate specific
 uses (e.g. `a[i]::Int`) than to try to pack many alternatives into one type.
 
+## Prefer exported methods over direct field access
+
+Idiomatic Julia code should generally treat a module's exported methods as the
+interface to its types. An object's fields are generally considered
+implementation details and user code should only access them directly if this
+is stated to be the API. This has several benefits:
+
+- Package developers are freer to change the implementation without breaking
+  user code.
+- Methods can be passed to higher-order constructs like [`map`](@ref) (e.g.
+  `map(imag, zs)`) rather than `[z.im for z in zs]`).
+- Methods can be defined on abstract types.
+- Methods can describe a conceptual operation that can be shared across
+  disparate types (e.g. `real(z)` works on Complex numbers or Quaternions).
+
+Julia's dispatch system encourages this style because `play(x::MyType)` only
+defines the `play` method on that particular type, leaving other types to
+have their own implementation.
+
+Similarly, non-exported functions are typically internal and subject to change,
+unless the documentations states otherwise. Names sometimes are given a `_` prefix
+(or suffix) to further suggest that something is "internal" or an
+implementation-detail, but it is not a rule.
+
+Counter-examples to this rule include [`NamedTuple`](@ref), [`RegexMatch`](@ref match), [`StatStruct`](@ref stat).
+
 ## Use naming conventions consistent with Julia `base/`
 
   * modules and type names use capitalization and camel case: `module SparseArrays`, `struct UnitRange`.
@@ -142,9 +168,6 @@ uses (e.g. `a[i]::Int`) than to try to pack many alternatives into one type.
     as word separators. Underscores are also used to indicate a combination of concepts ([`remotecall_fetch`](@ref)
     as a more efficient implementation of `fetch(remotecall(...))`) or as modifiers.
   * functions mutating at least one of their arguments end in `!`.
-  * use identifiers starting with `_` to
-    denote functions, macros or variables that should be considered private and not part of a package's
-    public API.
   * conciseness is valued, but avoid abbreviation ([`indexin`](@ref) rather than `indxin`) as
     it becomes difficult to remember whether and how particular words are abbreviated.
 
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index 31ae3d7f26fe62..949d6f3b8d1a0d 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -1033,8 +1033,8 @@ The `where` keyword itself can be nested inside a more complex declaration. For
 consider the two types created by the following declarations:
 
 ```jldoctest
-julia> const T1 = Array{Array{T,1} where T, 1}
-Vector{Vector{T} where T} (alias for Array{Array{T, 1} where T, 1})
+julia> const T1 = Array{Array{T, 1} where T, 1}
+Vector{Vector} (alias for Array{Array{T, 1} where T, 1})
 
 julia> const T2 = Array{Array{T, 1}, 1} where T
 Array{Vector{T}, 1} where T
@@ -1108,6 +1108,50 @@ julia> NoFieldsParam{Int}() === NoFieldsParam{Int}()
 true
 ```
 
+## Types of functions
+
+Each function has its own type, which is a subtype of `Function`.
+
+```jldoctest foo41
+julia> foo41(x) = x + 1
+foo41 (generic function with 1 method)
+
+julia> typeof(foo41)
+typeof(foo41) (singleton type of function foo41, subtype of Function)
+```
+
+Note how `typeof(foo41)` prints as itself. This is merely a convention for printing, as it is a first-class object that can be used like any other value:
+
+```jldoctest foo41
+julia> T = typeof(foo41)
+typeof(foo41) (singleton type of function foo41, subtype of Function)
+
+julia> T <: Function
+true
+```
+
+Types of functions defined at top-level are singletons. When necessary, you can compare them with [`===`](@ref).
+
+[Closures](@id man-anonymous-functions) also have their own type, which is usually printed with names that end in `#<number>`. Names and types for functions defined at different locations are distinct, but not guaranteed to be printed the same way across sessions.
+
+```jldoctest; filter = r"[0-9\.]+"
+julia> typeof(x -> x + 1)
+var"#9#10"
+```
+
+Types of closures are not necessarily singletons.
+
+```jldoctest
+julia> addy(y) = x -> x + y
+addy (generic function with 1 method)
+
+julia> Base.issingletontype(addy(1))
+false
+
+julia> addy(1) === addy(2)
+false
+```
+
 ## [`Type{T}` type selectors](@id man-typet-type)
 
 For each type `T`, `Type{T}` is an abstract parametric type whose only instance is the
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index 906a6c6d06b43d..442943806a3b80 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -1,7 +1,7 @@
 # [Scope of Variables](@id scope-of-variables)
 
-The *scope* of a variable is the region of code within which a variable is visible. Variable scoping
-helps avoid variable naming conflicts. The concept is intuitive: two functions can both have
+The *scope* of a variable is the region of code within which a variable is accessible. Variable
+scoping helps avoid variable naming conflicts. The concept is intuitive: two functions can both have
 arguments called `x` without the two `x`'s referring to the same thing. Similarly, there are many
 other cases where different blocks of code can use the same name without referring to the same
 thing. The rules for when the same variable name does or doesn't refer to the same thing are called
@@ -103,14 +103,37 @@ Note that the interactive prompt (aka REPL) is in the global scope of the module
 
 ## Local Scope
 
-A new local scope is introduced by most code blocks (see above [table](@ref man-scope-table) for a
-complete list). Some programming languages require explicitly declaring new variables before using
-them. Explicit declaration works in Julia too: in any local scope, writing `local x` declares a new
-local variable in that scope, regardless of whether there is already a variable named `x` in an
-outer scope or not. Declaring each new local like this is somewhat verbose and tedious, however, so
-Julia, like many other languages, considers assignment to a new variable in a local scope to
-implicitly declare that variable as a new local. Mostly this is pretty intuitive, but as with many
-things that behave intuitively, the details are more subtle than one might naïvely imagine.
+A new local scope is introduced by most code blocks (see above [table](@ref
+man-scope-table) for a complete list). If such a block is syntactically nested
+inside of another local scope, the scope it creates is nested inside of all the
+local scopes that it appears within, which are all ultimately nested inside of
+the global scope of the module in which the code is evaluated. Variables in
+outer scopes are visible from any scope they contain — meaning that they can be
+read and written in inner scopes — unless there is a local variable with the
+same name that "shadows" the outer variable of the same name. This is true even
+if the outer local is declared after (in the sense of textually below) an inner
+block. When we say that a variable "exists" in a given scope, this means that a
+variable by that name exists in any of the scopes that the current scope is
+nested inside of, including the current one.
+
+Some programming languages require explicitly declaring new variables before
+using them. Explicit declaration works in Julia too: in any local scope, writing
+`local x` declares a new local variable in that scope, regardless of whether
+there is already a variable named `x` in an outer scope or not. Declaring each
+new variable like this is somewhat verbose and tedious, however, so Julia, like
+many other languages, considers assignment to a variable name that doesn't
+already exist to implicitly declare that variable. If the current scope is
+global, the new variable is global; if the current scope is local, the new
+variable is local to the innermost local scope and will be visible inside of
+that scope but not outside of it. If you assign to an existing local, it
+_always_ updates that existing local: you can only shadow a local by explicitly
+declaring a new local in a nested scope with the `local` keyword. In particular,
+this applies to variables assigned in inner functions, which may surprise users
+coming from Python where assignment in an inner function creates a new local
+unless the variable is explictly declared to be non-local.
+
+Mostly this is pretty intuitive, but as with many things that behave
+intuitively, the details are more subtle than one might naïvely imagine.
 
 When `x = <value>` occurs in a local scope, Julia applies the following rules to decide what the
 expression means based on where the assignment expression occurs and what `x` already refers to at
@@ -183,9 +206,15 @@ Since the `x` in `greet` is local, the value (or lack thereof) of the global `x`
 calling `greet`. The hard scope rule doesn't care whether a global named `x` exists or not:
 assignment to `x` in a hard scope is local (unless `x` is declared global).
 
-The next clear cut situation we'll consider is when there is already a local variable named `x`, in
-which case `x = <value>` always assigns to this existing local `x`.  The function `sum_to` computes
-the sum of the numbers from one up to `n`:
+The next clear cut situation we'll consider is when there is already a local
+variable named `x`, in which case `x = <value>` always assigns to this existing
+local `x`. This is true whether the assignment occurs in the same local scope,
+an inner local scope in the same function body, or in the body of a function
+nested inside of another function, also known as a
+[closure](https://en.wikipedia.org/wiki/Closure_(computer_programming)).
+
+We'll use the `sum_to` function, which computes the sum of integers from one up
+to `n`, as an example:
 
 ```julia
 function sum_to(n)
@@ -252,6 +281,44 @@ introduces a hard scope, the assignment causes `t` to become a new local variabl
 where it appears, i.e. inside of the loop body. Even if there were a global named `t`, it would make
 no difference—the hard scope rule isn't affected by anything in global scope.
 
+Note that the local scope of a for loop body is no different from the local
+scope of an inner function. This means that we could rewrite this example so
+that the loop body is implemented as a call to an inner helper function and it
+behaves the same way:
+
+```jldoctest
+julia> function sum_to_def_closure(n)
+           function loop_body(i)
+               t = s + i # new local `t`
+               s = t # assign same local `s` as below
+           end
+           s = 0 # new local
+           for i = 1:n
+               loop_body(i)
+           end
+           return s, @isdefined(t)
+       end
+sum_to_def_closure (generic function with 1 method)
+
+julia> sum_to_def_closure(10)
+(55, false)
+```
+
+This example illustrates a couple of key points:
+
+1. Inner function scopes are just like any other nested local scope. In
+   particular, if a variable is already a local outside of an inner function and
+   you assign to it in the inner function, the outer local variable is updated.
+
+2. It doesn't matter if the definition of an outer local happens below where it
+   is updated, the rule remains the same. The entire enclosing local scope is
+   parsed and its locals determined before inner local meanings are resolved.
+
+This design means that you can generally move code in or out of an inner
+function without changing its meaning, which facilitates a number of common
+idioms in the language using closures (see [do blocks](@ref
+Do-Block-Syntax-for-Function-Arguments)).
+
 Let's move onto some more ambiguous cases covered by the soft scope rule. We'll explore this by
 extracting the bodies of the `greet` and `sum_to_def` functions into soft scope contexts. First, let's put the
 body of `greet` in a `for` loop—which is soft, rather than hard—and evaluate it in the REPL:
diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md
index 65619dd02430d8..004efb92dc0e5d 100644
--- a/doc/src/manual/variables.md
+++ b/doc/src/manual/variables.md
@@ -90,7 +90,7 @@ julia> sqrt = 4
 ERROR: cannot assign a value to variable Base.sqrt from module Main
 ```
 
-## Allowed Variable Names
+## [Allowed Variable Names](@id man-allowed-variable-names)
 
 Variable names must begin with a letter (A-Z or a-z), underscore, or a subset of Unicode code
 points greater than 00A0; in particular, [Unicode character categories](http://www.fileformat.info/info/unicode/category/index.htm)
@@ -110,6 +110,19 @@ A space is required between an operator that ends with a subscript/superscript l
 variable name. For example, if `+ᵃ` is an operator, then `+ᵃx` must be written as `+ᵃ x` to distinguish
 it from `+ ᵃx` where `ᵃx` is the variable name.
 
+
+A particular class of variable names is one that contains only underscores. These identifiers can only be assigned values but cannot be used to assign values to other variables.
+More technically, they can only be used as an [L-value](https://en.wikipedia.org/wiki/Value_(computer_science)#lrvalue), but not as an
+ [R-value](https://en.wikipedia.org/wiki/R-value):
+
+```julia-repl
+julia> x, ___ = size([2 2; 1 1])
+(2, 2)
+
+julia> y = ___
+ERROR: syntax: all-underscore identifier used as rvalue
+```
+
 The only explicitly disallowed names for variables are the names of the built-in [Keywords](@ref):
 
 ```julia-repl
@@ -123,9 +136,14 @@ ERROR: syntax: unexpected "="
 Some Unicode characters are considered to be equivalent in identifiers.
 Different ways of entering Unicode combining characters (e.g., accents)
 are treated as equivalent (specifically, Julia identifiers are [NFC](http://www.macchiato.com/unicode/nfc-faq)-normalized).
-The Unicode characters `ɛ` (U+025B: Latin small letter open e)
-and `µ` (U+00B5: micro sign) are treated as equivalent to the corresponding
-Greek letters, because the former are easily accessible via some input methods.
+Julia also includes a few non-standard equivalences for characters that are
+visually similar and are easily entered by some input methods. The Unicode
+characters `ɛ` (U+025B: Latin small letter open e) and `µ` (U+00B5: micro sign)
+are treated as equivalent to the corresponding Greek letters. The middle dot
+`·` (U+00B7) and the Greek
+[interpunct](https://en.wikipedia.org/wiki/Interpunct) `·` (U+0387) are both
+treated as the mathematical dot operator `⋅` (U+22C5).
+The minus sign `−` (U+2212) is treated as equivalent to the hyphen-minus sign `-` (U+002D).
 
 ## Stylistic Conventions
 
@@ -141,7 +159,5 @@ conventions:
   * Functions that write to their arguments have names that end in `!`. These are sometimes called
     "mutating" or "in-place" functions because they are intended to produce changes in their arguments
     after the function is called, not just return a value.
-  * Names starting with an underscore denote functions, macros or variables that are only used internally
-    by a package and are not part of its public API.
 
 For more information about stylistic conventions, see the [Style Guide](@ref).
diff --git a/src/Makefile b/src/Makefile
index 6f2ee2960c7a26..ed0cbaf9c8c9ae 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -55,7 +55,7 @@ SRCS += codegen llvm-ptls
 RUNTIME_SRCS += jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
 	llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \
 	llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
-	llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-api llvm-remove-addrspaces \
+	llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-remove-addrspaces \
 	llvm-remove-ni llvm-julia-licm llvm-demote-float16
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
 LLVM_LIBS := all
@@ -84,11 +84,19 @@ endif
 
 SRCS += $(RUNTIME_SRCS)
 
+ifeq ($(WITH_DTRACE),1)
+DTRACE_HEADERS := uprobes.h.gen
+ifneq ($(OS),Darwin)
+SRCS += uprobes
+endif
+else
+DTRACE_HEADERS :=
+endif
 
 # headers are used for dependency tracking, while public headers will be part of the dist
 UV_HEADERS :=
-HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h tls.h locks.h atomics.h julia_internal.h options.h timing.h)
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h tls.h locks.h atomics.h julia_gcext.h)
+HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h locks.h atomics.h julia_internal.h options.h timing.h) $(addprefix $(BUILDDIR)/, $(DTRACE_HEADERS))
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h locks.h atomics.h julia_gcext.h)
 ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
@@ -105,7 +113,7 @@ LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
 #       https://github.com/JuliaLang/julia/issues/29981
 else
 ifneq ($(USE_LLVM_SHLIB),1)
-LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs $(LLVM_LIBS)) $($(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --system-libs 2> /dev/null)
+LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs $(LLVM_LIBS) --link-static) $($(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --system-libs 2> /dev/null)
 else
 LLVMLINK += $(LLVM_LDFLAGS) -lLLVM
 endif
@@ -140,11 +148,13 @@ DEBUGFLAGS += "-DJL_LIBJULIA_SONAME=\"libjulia-debug.$(JL_MAJOR_SHLIB_EXT)\"" "-
 
 ifeq ($(USE_CROSS_FLISP), 1)
 FLISPDIR := $(BUILDDIR)/flisp/host
+FLISP_EXECUTABLE_debug := $(FLISPDIR)/flisp-debug$(BUILD_EXE)
+FLISP_EXECUTABLE_release := $(FLISPDIR)/flisp$(BUILD_EXE)
 else
 FLISPDIR := $(BUILDDIR)/flisp
+FLISP_EXECUTABLE_debug := $(FLISPDIR)/flisp-debug$(EXE)
+FLISP_EXECUTABLE_release := $(FLISPDIR)/flisp$(EXE)
 endif
-FLISP_EXECUTABLE_debug := $(FLISPDIR)/flisp-debug$(BUILD_EXE)
-FLISP_EXECUTABLE_release := $(FLISPDIR)/flisp$(BUILD_EXE)
 ifeq ($(OS),WINNT)
 FLISP_EXECUTABLE := $(FLISP_EXECUTABLE_release)
 else
@@ -161,6 +171,13 @@ $(BUILDDIR):
 
 LLVM_CONFIG_ABSOLUTE := $(shell which $(LLVM_CONFIG))
 
+# Generate the DTrace header file, while also renaming the macros from
+# JULIA_ to JL_PROBE to clearly delinate them.
+$(BUILDDIR)/%.h.gen : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -h -s $< -o $@)
+	sed 's/JULIA_/JL_PROBE_/' $@ > $@.tmp
+	mv $@.tmp $@
+
 # source file rules
 $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
@@ -170,6 +187,10 @@ $(BUILDDIR)/%.o: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONF
 	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(SHIPFLAGS) $(CXX_DISABLE_ASSERTION) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONFIG_ABSOLUTE) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+$(BUILDDIR)/%.o : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
+$(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
 
 # public header rules
 $(eval $(call dir_target,$(build_includedir)/julia))
@@ -209,7 +230,7 @@ endif
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@
 
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE)
-	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCXXFLAGS) $(JCPPFLAGS) $(DEBUGFLAGS) -O3 $< $(fPIC) -shared -o $@ $(JLDFLAGS) -L$(build_shlibdir) -L$(build_libdir) $(NO_WHOLE_ARCHIVE) $(LLVMLINK))
+	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCXXFLAGS) $(JCPPFLAGS) $(DEBUGFLAGS) -O3 $< $(fPIC) -shared -o $@ $(JLDFLAGS) -L$(build_shlibdir) -L$(build_libdir) $(NO_WHOLE_ARCHIVE) $(LLVMLINK)) -lpthread
 
 julia_flisp.boot.inc.phony: $(BUILDDIR)/julia_flisp.boot.inc
 
@@ -240,7 +261,7 @@ $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_pro
 $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/codegen_shared.h $(BUILDDIR)/julia_version.h
-$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h
 $(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
 $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
@@ -257,10 +278,10 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
 
 # archive library file rules
-$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
+$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S *.inc) $(SRCDIR)/support/*.c
 	$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
-$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
+$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S *.inc) $(SRCDIR)/support/*.c
 	$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
 $(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
@@ -356,9 +377,9 @@ endif
 clangsa: $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT)
 
 clang-sa-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) | analyzegc-deps-check
-	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS)  -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker --analyzer-no-default-checks -fcolor-diagnostics -Werror -x c $<)
+	@$(call PRINT_ANALYZE, $(build_bindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS)  -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker --analyzer-no-default-checks -fcolor-diagnostics -Werror -x c $<)
 clang-sa-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) | analyzegc-deps-check
-	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker --analyzer-no-default-checks -fcolor-diagnostics -Werror -x c++ $<)
+	@$(call PRINT_ANALYZE, $(build_bindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker --analyzer-no-default-checks -fcolor-diagnostics -Werror -x c++ $<)
 
 # Add C files as a target of `analyzegc`
 analyzegc: $(addprefix clang-sa-,$(RUNTIME_SRCS))
diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp
index ce94cc66f06417..3e6b995f07b1ef 100644
--- a/src/abi_aarch64.cpp
+++ b/src/abi_aarch64.cpp
@@ -13,33 +13,26 @@
 
 struct ABI_AArch64Layout : AbiLayout {
 
-Type *get_llvm_vectype(jl_datatype_t *dt) const
+Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
     if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
         return nullptr;
     size_t nfields = dt->layout->nfields;
     assert(nfields > 0);
     if (nfields < 2)
         return nullptr;
-#if JL_LLVM_VERSION >= 110000
-    static Type *T_vec64 = FixedVectorType::get(T_int32, 2);
-    static Type *T_vec128 = FixedVectorType::get(T_int32, 4);
-#else
-    static Type *T_vec64 = VectorType::get(T_int32, 2);
-    static Type *T_vec128 = VectorType::get(T_int32, 4);
-#endif
     Type *lltype;
     // Short vector should be either 8 bytes or 16 bytes.
     // Note that there are only two distinct fundamental types for
     // short vectors so we normalize them to <2 x i32> and <4 x i32>
     switch (jl_datatype_size(dt)) {
     case 8:
-        lltype = T_vec64;
+        lltype = FixedVectorType::get(Type::getInt32Ty(ctx), 2);
         break;
     case 16:
-        lltype = T_vec128;
+        lltype = FixedVectorType::get(Type::getInt32Ty(ctx), 4);
         break;
     default:
         return nullptr;
@@ -64,24 +57,24 @@ Type *get_llvm_vectype(jl_datatype_t *dt) const
 }
 
 #define jl_is_floattype(v)   jl_subtype(v,(jl_value_t*)jl_floatingpoint_type)
-Type *get_llvm_fptype(jl_datatype_t *dt) const
+Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
     Type *lltype;
     // Check size first since it's cheaper.
     switch (jl_datatype_size(dt)) {
     case 2:
-        lltype = T_float16;
+        lltype = Type::getHalfTy(ctx);
         break;
     case 4:
-        lltype = T_float32;
+        lltype = Type::getFloatTy(ctx);
         break;
     case 8:
-        lltype = T_float64;
+        lltype = Type::getDoubleTy(ctx);
         break;
     case 16:
-        lltype = T_float128;
+        lltype = Type::getFP128Ty(ctx);
         break;
     default:
         return nullptr;
@@ -90,12 +83,12 @@ Type *get_llvm_fptype(jl_datatype_t *dt) const
             lltype : nullptr);
 }
 
-Type *get_llvm_fp_or_vectype(jl_datatype_t *dt) const
+Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->mutabl || dt->layout->npointers || dt->layout->haspadding)
+    if (dt->name->mutabl || dt->layout->npointers || dt->layout->haspadding)
         return nullptr;
-    return dt->layout->nfields ? get_llvm_vectype(dt) : get_llvm_fptype(dt);
+    return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
 }
 
 struct ElementType {
@@ -110,7 +103,7 @@ struct ElementType {
 // Data Types of the members that compose the type are the same.
 // Note that it is the fundamental types that are important and not the member
 // types.
-bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) const
+bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele, LLVMContext &ctx) const
 {
     // Assume:
     //     dt is a pointerfree type, (all members are isbits)
@@ -138,7 +131,7 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) c
             dt = (jl_datatype_t*)jl_field_type(dt, i);
             continue;
         }
-        if (Type *vectype = get_llvm_vectype(dt)) {
+        if (Type *vectype = get_llvm_vectype(dt, ctx)) {
             if ((ele.sz && dsz != ele.sz) || (ele.type && ele.type != vectype))
                 return false;
             ele.type = vectype;
@@ -154,7 +147,7 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) c
             jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type(dt, i);
             // Check element count.
             // This needs to be done after the zero size member check
-            if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele)) {
+            if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele, ctx)) {
                 return false;
             }
         }
@@ -163,7 +156,7 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) c
     // For bitstypes
     if (ele.sz && dsz != ele.sz)
         return false;
-    Type *new_type = get_llvm_fptype(dt);
+    Type *new_type = get_llvm_fptype(dt, ctx);
     if (new_type && (!ele.type || ele.type == new_type)) {
         ele.type = new_type;
         ele.sz = dsz;
@@ -173,7 +166,7 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) c
     return false;
 }
 
-Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele) const
+Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
 
@@ -189,18 +182,18 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele) const
         return NULL;
     nele = 0;
     ElementType eltype;
-    if (isHFAorHVA(dt, dsz, nele, eltype))
+    if (isHFAorHVA(dt, dsz, nele, eltype, ctx))
         return eltype.type;
     return NULL;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx) override
 {
     // B.2
     //   If the argument type is an HFA or an HVA, then the argument is used
     //   unmodified.
     size_t size;
-    if (isHFAorHVA(dt, size))
+    if (isHFAorHVA(dt, size, ctx))
         return false;
     // B.3
     //   If the argument type is a Composite Type that is larger than 16 bytes,
@@ -227,7 +220,7 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
 //
 // All the out parameters should be default to `false`.
 Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
-                   size_t *rewrite_len) const
+                   size_t *rewrite_len, LLVMContext &ctx) const
 {
     // Based on section 5.4 C of the Procedure Call Standard
     // C.1
@@ -236,7 +229,7 @@ Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
     //   the argument is allocated to the least significant bits of register
     //   v[NSRN]. The NSRN is incremented by one. The argument has now been
     //   allocated.
-    if (get_llvm_fp_or_vectype(dt)) {
+    if (get_llvm_fp_or_vectype(dt, ctx)) {
         *fpreg = true;
         return NULL;
     }
@@ -248,7 +241,7 @@ Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
     //   Floating-point Registers (with one register per member of the HFA
     //   or HVA). The NSRN is incremented by the number of registers used.
     //   The argument has now been allocated.
-    if (Type *eltype = isHFAorHVA(dt, *rewrite_len)) {
+    if (Type *eltype = isHFAorHVA(dt, *rewrite_len, ctx)) {
         assert(*rewrite_len > 0 && *rewrite_len <= 4);
         // HFA and HVA have <= 4 members
         *fpreg = true;
@@ -327,7 +320,7 @@ Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
     assert(jl_datatype_size(dt) <= 16); // Should be pass by reference otherwise
     *rewrite_len = (jl_datatype_size(dt) + 7) >> 3;
     // Rewrite to [n x Int64] where n is the **size in dword**
-    return jl_datatype_size(dt) ? T_int64 : NULL;
+    return jl_datatype_size(dt) ? Type::getInt64Ty(ctx) : NULL;
 
     // C.11
     //   The NGRN is set to 8.
@@ -351,7 +344,7 @@ Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
     // <handled by C.10 above>
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     // Section 5.5
     // If the type, T, of the result of a function is such that
@@ -365,18 +358,18 @@ bool use_sret(jl_datatype_t *dt) override
     bool fpreg = false;
     bool onstack = false;
     size_t rewrite_len = 0;
-    classify_arg(dt, &fpreg, &onstack, &rewrite_len);
+    classify_arg(dt, &fpreg, &onstack, &rewrite_len, ctx);
     return onstack;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
-    if (Type *fptype = get_llvm_fp_or_vectype(dt))
+    if (Type *fptype = get_llvm_fp_or_vectype(dt, ctx))
         return fptype;
     bool fpreg = false;
     bool onstack = false;
     size_t rewrite_len = 0;
-    if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len))
+    if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len, ctx))
         return ArrayType::get(rewrite_ty, rewrite_len);
     return NULL;
 }
diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp
index 1a5d3d06513689..032943abd45f03 100644
--- a/src/abi_arm.cpp
+++ b/src/abi_arm.cpp
@@ -23,29 +23,29 @@
 
 struct ABI_ARMLayout : AbiLayout {
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &abi, LLVMContext &ctx) override
 {
     return false;
 }
 
 #define jl_is_floattype(v)   jl_subtype(v,(jl_value_t*)jl_floatingpoint_type)
 
-Type *get_llvm_fptype(jl_datatype_t *dt) const
+Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->mutabl || jl_datatype_nfields(dt) != 0)
+    if (dt->name->mutabl || jl_datatype_nfields(dt) != 0)
         return NULL;
     Type *lltype;
     // Check size first since it's cheaper.
     switch (jl_datatype_size(dt)) {
     case 2:
-        lltype = T_float16;
+        lltype = Type::getHalfTy(ctx);
         break;
     case 4:
-        lltype = T_float32;
+        lltype = Type::getFloatTy(ctx);
         break;
     case 8:
-        lltype = T_float64;
+        lltype = Type::getDoubleTy(ctx);
         break;
     default:
         return NULL;
@@ -58,10 +58,10 @@ Type *get_llvm_fptype(jl_datatype_t *dt) const
 // fundamental type.
 //
 // Returns the corresponding LLVM type.
-Type *isLegalHAType(jl_datatype_t *dt) const
+Type *isLegalHAType(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // single- or double-precision floating-point type
-    if (Type *fp = get_llvm_fptype(dt))
+    if (Type *fp = get_llvm_fptype(dt, ctx))
         return fp;
 
     // NOT SUPPORTED: 64- or 128-bit containerized vectors
@@ -74,7 +74,7 @@ Type *isLegalHAType(jl_datatype_t *dt) const
 //
 // Legality of the HA is determined by a nonzero return value.
 // In case of a non-legal HA, the value of 'base' is undefined.
-size_t isLegalHA(jl_datatype_t *dt, Type *&base) const
+size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
 {
     // Homogeneous aggregates are only used for VFP registers,
     // so use that definition of legality (section 6.1.2.1)
@@ -92,10 +92,10 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base) const
         for (size_t i = 0; i < parent_members; ++i) {
             jl_datatype_t *fdt = (jl_datatype_t*)jl_field_type(dt,i);
 
-            Type *T = isLegalHAType(fdt);
+            Type *T = isLegalHAType(fdt, ctx);
             if (T)
                 total_members++;
-            else if (size_t field_members = isLegalHA(fdt, T))
+            else if (size_t field_members = isLegalHA(fdt, T, ctx))
                 // recursive application (expanding nested composite types)
                 total_members += field_members;
             else
@@ -120,7 +120,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base) const
 // Determine if an argument can be passed through a coprocessor register.
 //
 // All the out parameters should be default to `false`.
-void classify_cprc(jl_datatype_t *dt, bool *vfp) const
+void classify_cprc(jl_datatype_t *dt, bool *vfp, LLVMContext &ctx) const
 {
     // Based on section 6.1 of the Procedure Call Standard
 
@@ -128,7 +128,7 @@ void classify_cprc(jl_datatype_t *dt, bool *vfp) const
     // - A half-precision floating-point type.
     // - A single-precision floating-point type.
     // - A double-precision floating-point type.
-    if (get_llvm_fptype(dt)) {
+    if (get_llvm_fptype(dt, ctx)) {
         *vfp = true;
         return;
     }
@@ -137,14 +137,14 @@ void classify_cprc(jl_datatype_t *dt, bool *vfp) const
 
     // - A Homogeneous Aggregate
     Type *base = NULL;
-    if (isLegalHA(dt, base)) {
+    if (isLegalHA(dt, base, ctx)) {
         *vfp = true;
         return;
     }
 }
 
-void classify_return_arg(jl_datatype_t *dt, bool *reg,
-                         bool *onstack, bool *need_rewrite) const
+void classify_return_arg(jl_datatype_t *dt, bool *reg, bool *onstack,
+                         bool *need_rewrite, LLVMContext &ctx) const
 {
     // Based on section 5.4 of the Procedure Call Standard
 
@@ -152,7 +152,7 @@ void classify_return_arg(jl_datatype_t *dt, bool *reg,
     //   Any result whose type would satisfy the conditions for a VFP CPRC is
     //   returned in the appropriate number of consecutive VFP registers
     //   starting with the lowest numbered register (s0, d0, q0).
-    classify_cprc(dt, reg);
+    classify_cprc(dt, reg, ctx);
     if (*reg)
         return;
 
@@ -196,12 +196,12 @@ void classify_return_arg(jl_datatype_t *dt, bool *reg,
         *onstack = true;
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     bool reg = false;
     bool onstack = false;
     bool need_rewrite = false;
-    classify_return_arg(dt, &reg, &onstack, &need_rewrite);
+    classify_return_arg(dt, &reg, &onstack, &need_rewrite, ctx);
 
     return onstack;
 }
@@ -218,7 +218,7 @@ bool use_sret(jl_datatype_t *dt) override
 //
 // All the out parameters should be default to `false`.
 void classify_arg(jl_datatype_t *dt, bool *reg,
-                  bool *onstack, bool *need_rewrite) const
+                  bool *onstack, bool *need_rewrite, LLVMContext &ctx) const
 {
     // Based on section 5.5 of the Procedure Call Standard
 
@@ -226,7 +226,7 @@ void classify_arg(jl_datatype_t *dt, bool *reg,
     //   If the argument is a CPRC and there are sufficient unallocated
     //   co-processor registers of the appropriate class, the argument is
     //   allocated to co-processor registers.
-    classify_cprc(dt, reg);
+    classify_cprc(dt, reg, ctx);
     if (*reg)
         return;
 
@@ -239,18 +239,18 @@ void classify_arg(jl_datatype_t *dt, bool *reg,
     *need_rewrite = true;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
-    if (Type *fptype = get_llvm_fptype(dt))
+    if (Type *fptype = get_llvm_fptype(dt, ctx))
         return fptype;
 
     bool reg = false;
     bool onstack = false;
     bool need_rewrite = false;
     if (isret)
-        classify_return_arg(dt, &reg, &onstack, &need_rewrite);
+        classify_return_arg(dt, &reg, &onstack, &need_rewrite, ctx);
     else
-        classify_arg(dt, &reg, &onstack, &need_rewrite);
+        classify_arg(dt, &reg, &onstack, &need_rewrite, ctx);
 
     if (!need_rewrite)
         return NULL;
@@ -276,7 +276,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
     if (align > 8)
         align = 8;
 
-    Type *T = Type::getIntNTy(jl_LLVMContext, align*8);
+    Type *T = Type::getIntNTy(ctx, align*8);
     return ArrayType::get(T, (jl_datatype_size(dt) + align - 1) / align);
 }
 
diff --git a/src/abi_llvm.cpp b/src/abi_llvm.cpp
index 1ab30da1b2f75b..f21edeadee03ad 100644
--- a/src/abi_llvm.cpp
+++ b/src/abi_llvm.cpp
@@ -40,17 +40,17 @@
 
 struct ABI_LLVMLayout : AbiLayout {
 
-bool use_sret(jl_datatype_t *ty) override
+bool use_sret(jl_datatype_t *ty, LLVMContext &ctx) override
 {
     return false;
 }
 
-bool needPassByRef(jl_datatype_t *ty, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *ty, AttrBuilder &ab, LLVMContext &ctx) override
 {
     return false;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *ty, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *ty, bool isret, LLVMContext &ctx) const override
 {
     return NULL;
 }
diff --git a/src/abi_ppc64le.cpp b/src/abi_ppc64le.cpp
index dd6f927d9c3014..da1d8484a0823d 100644
--- a/src/abi_ppc64le.cpp
+++ b/src/abi_ppc64le.cpp
@@ -92,7 +92,7 @@ unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
     return n;
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     jl_datatype_t *ty0 = NULL;
     bool hva = false;
@@ -101,7 +101,7 @@ bool use_sret(jl_datatype_t *dt) override
     return false;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx) override
 {
     jl_datatype_t *ty0 = NULL;
     bool hva = false;
@@ -112,7 +112,7 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
     return false;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     // Arguments are either scalar or passed by value
     size_t size = jl_datatype_size(dt);
@@ -134,11 +134,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
             jl_datatype_t *vecty = (jl_datatype_t*)jl_field_type(ty0, 0);
             assert(jl_is_datatype(vecty) && vecty->name == jl_vecelement_typename);
             Type *ety = bitstype_to_llvm(jl_tparam0(vecty));
-#if JL_LLVM_VERSION >= 110000
             Type *vty = FixedVectorType::get(ety, jl_datatype_nfields(ty0));
-#else
-            Type *vty = VectorType::get(ety, jl_datatype_nfields(ty0));
-#endif
             return ArrayType::get(vty, hfa);
         }
     }
@@ -146,14 +142,15 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
     // the bitsize of the integer gives the desired alignment
     if (size > 8) {
         if (jl_datatype_align(dt) <= 8) {
+            Type  *T_int64 = Type::getInt64Ty(ctx);
             return ArrayType::get(T_int64, (size + 7) / 8);
         }
         else {
-            Type *T_int128 = Type::getIntNTy(jl_LLVMContext, 128);
+            Type *T_int128 = Type::getIntNTy(ctx, 128);
             return ArrayType::get(T_int128, (size + 15) / 16);
         }
     }
-    return Type::getIntNTy(jl_LLVMContext, size * 8);
+    return Type::getIntNTy(ctx, size * 8);
 }
 
 };
diff --git a/src/abi_win32.cpp b/src/abi_win32.cpp
index af16a0310b1248..a25fcaec9b82a8 100644
--- a/src/abi_win32.cpp
+++ b/src/abi_win32.cpp
@@ -39,7 +39,7 @@
 
 struct ABI_Win32Layout : AbiLayout {
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     // Use sret if the size of the argument is not one of 1, 2, 4, 8 bytes
     // This covers the special case of ComplexF32
@@ -49,7 +49,7 @@ bool use_sret(jl_datatype_t *dt) override
     return true;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx) override
 {
     // Use pass by reference for all structs
     if (dt->layout->nfields > 0) {
@@ -59,13 +59,13 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
     return false;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     // Arguments are either scalar or passed by value
     // rewrite integer sized (non-sret) struct to the corresponding integer
     if (!dt->layout->nfields)
         return NULL;
-    return Type::getIntNTy(jl_LLVMContext, jl_datatype_nbits(dt));
+    return Type::getIntNTy(ctx, jl_datatype_nbits(dt));
 }
 
 };
diff --git a/src/abi_win64.cpp b/src/abi_win64.cpp
index 16e46a9703f6ad..6f6d407cfc10dc 100644
--- a/src/abi_win64.cpp
+++ b/src/abi_win64.cpp
@@ -47,7 +47,7 @@ struct ABI_Win64Layout : AbiLayout {
 int nargs;
 ABI_Win64Layout() : nargs(0) { }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     size_t size = jl_datatype_size(dt);
     if (win64_reg_size(size) || is_native_simd_type(dt))
@@ -56,7 +56,7 @@ bool use_sret(jl_datatype_t *dt) override
     return true;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx) override
 {
     nargs++;
     size_t size = jl_datatype_size(dt);
@@ -67,11 +67,11 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
     return true;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     size_t size = jl_datatype_size(dt);
     if (size > 0 && win64_reg_size(size) && !jl_is_primitivetype(dt))
-        return Type::getIntNTy(jl_LLVMContext, jl_datatype_nbits(dt));
+        return Type::getIntNTy(ctx, jl_datatype_nbits(dt));
     return NULL;
 }
 
diff --git a/src/abi_x86.cpp b/src/abi_x86.cpp
index 7a65de028e083e..c68e657695f3c3 100644
--- a/src/abi_x86.cpp
+++ b/src/abi_x86.cpp
@@ -57,7 +57,7 @@ inline bool is_complex128(jl_datatype_t *dt) const
     return is_complex_type(dt) && jl_tparam0(dt) == (jl_value_t*)jl_float64_type;
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     size_t size = jl_datatype_size(dt);
     if (size == 0)
@@ -67,7 +67,7 @@ bool use_sret(jl_datatype_t *dt) override
     return true;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx) override
 {
     size_t size = jl_datatype_size(dt);
     if (is_complex64(dt) || is_complex128(dt) || (jl_is_primitivetype(dt) && size <= 8))
@@ -76,7 +76,7 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
     return true;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     if (!isret)
         return NULL;
diff --git a/src/abi_x86_64.cpp b/src/abi_x86_64.cpp
index ac28af3011ecd3..898e98dfcc262e 100644
--- a/src/abi_x86_64.cpp
+++ b/src/abi_x86_64.cpp
@@ -168,7 +168,7 @@ Classification classify(jl_datatype_t *dt) const
     return cl;
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     int sret = classify(dt).isMemory;
     if (sret) {
@@ -178,7 +178,7 @@ bool use_sret(jl_datatype_t *dt) override
     return sret;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx) override
 {
     Classification cl = classify(dt);
     if (cl.isMemory) {
@@ -210,7 +210,7 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
 
 // Called on behalf of ccall to determine preferred LLVM representation
 // for an argument or return value.
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     (void) isret;
     // no need to rewrite these types (they are returned as pointers anyways)
@@ -230,15 +230,15 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
     switch (cl.classes[0]) {
         case Integer:
             if (size >= 8)
-                types[0] = T_int64;
+                types[0] = Type::getInt64Ty(ctx);
             else
-                types[0] = Type::getIntNTy(jl_LLVMContext, nbits);
+                types[0] = Type::getIntNTy(ctx, nbits);
             break;
         case Sse:
             if (size <= 4)
-                types[0] = T_float32;
+                types[0] = Type::getFloatTy(ctx);
             else
-                types[0] = T_float64;
+                types[0] = Type::getDoubleTy(ctx);
             break;
         default:
             assert(0 && "Unexpected cl.classes[0]");
@@ -248,14 +248,14 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
             return types[0];
         case Integer:
             assert(size > 8);
-            types[1] = Type::getIntNTy(jl_LLVMContext, (nbits-64));
-            return StructType::get(jl_LLVMContext,ArrayRef<Type*>(&types[0],2));
+            types[1] = Type::getIntNTy(ctx, (nbits-64));
+            return StructType::get(ctx,ArrayRef<Type*>(&types[0],2));
         case Sse:
             if (size <= 12)
-                types[1] = T_float32;
+                types[1] = Type::getFloatTy(ctx);
             else
-                types[1] = T_float64;
-            return StructType::get(jl_LLVMContext,ArrayRef<Type*>(&types[0],2));
+                types[1] = Type::getDoubleTy(ctx);
+            return StructType::get(ctx,ArrayRef<Type*>(&types[0],2));
         default:
             assert(0 && "Unexpected cl.classes[0]");
     }
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 5dbdf7000f5e78..cfe3c562c7a597 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -5,11 +5,11 @@
 
 // target support
 #include <llvm/ADT/Triple.h>
+#include <llvm/Analysis/TargetLibraryInfo.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
+#include <llvm/IR/DataLayout.h>
 #include <llvm/Support/TargetRegistry.h>
 #include <llvm/Target/TargetMachine.h>
-#include <llvm/IR/DataLayout.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
 
 // analysis passes
 #include <llvm/Analysis/Passes.h>
@@ -26,6 +26,7 @@
 #include <llvm/Transforms/IPO/AlwaysInliner.h>
 #include <llvm/Transforms/InstCombine/InstCombine.h>
 #include <llvm/Transforms/Scalar/InstSimplifyPass.h>
+#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #if defined(USE_POLLY)
 #include <polly/RegisterPasses.h>
 #include <polly/LinkAllPasses.h>
@@ -35,21 +36,11 @@
 #endif
 #endif
 
-// for outputting assembly
+// for outputting code
 #include <llvm/Bitcode/BitcodeWriter.h>
 #include <llvm/Bitcode/BitcodeWriterPass.h>
 #include "llvm/Object/ArchiveWriter.h"
 #include <llvm/IR/IRPrintingPasses.h>
-#include <llvm/CodeGen/AsmPrinter.h>
-#include <llvm/CodeGen/MachineModuleInfo.h>
-#include <llvm/CodeGen/TargetPassConfig.h>
-#include <llvm/MC/MCAsmInfo.h>
-#include <llvm/MC/MCStreamer.h>
-#include <llvm/MC/MCAsmBackend.h>
-#include <llvm/MC/MCCodeEmitter.h>
-#if JL_LLVM_VERSION >= 100000
-#include <llvm/Support/CodeGen.h>
-#endif
 
 #include <llvm/IR/LegacyPassManagers.h>
 #include <llvm/Transforms/Utils/Cloning.h>
@@ -62,12 +53,6 @@ namespace llvm {
     extern Pass *createLowerSimdLoopPass();
 }
 
-#if JL_LLVM_VERSION < 100000
-static const TargetMachine::CodeGenFileType CGFT_ObjectFile = TargetMachine::CGFT_ObjectFile;
-static const TargetMachine::CodeGenFileType CGFT_AssemblyFile = TargetMachine::CGFT_AssemblyFile;
-#endif
-
-
 #include "julia.h"
 #include "julia_internal.h"
 #include "jitlayers.h"
@@ -293,8 +278,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
     JL_GC_PUSH1(&src);
     JL_LOCK(&codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
 
     CompilationPolicy policy = (CompilationPolicy) _policy;
@@ -422,8 +407,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
     }
 
     data->M = std::move(clone);
-    if (jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     if (policy == CompilationPolicy::ImagingMode)
         imaging_mode = 0;
     JL_UNLOCK(&codegen_lock); // Might GC
@@ -501,10 +486,10 @@ void jl_dump_native(void *native_code,
     addTargetPasses(&PM, TM.get());
 
     // set up optimization passes
-    SmallVector<char, 128> bc_Buffer;
-    SmallVector<char, 128> obj_Buffer;
-    SmallVector<char, 128> asm_Buffer;
-    SmallVector<char, 128> unopt_bc_Buffer;
+    SmallVector<char, 0> bc_Buffer;
+    SmallVector<char, 0> obj_Buffer;
+    SmallVector<char, 0> asm_Buffer;
+    SmallVector<char, 0> unopt_bc_Buffer;
     raw_svector_ostream bc_OS(bc_Buffer);
     raw_svector_ostream obj_OS(obj_Buffer);
     raw_svector_ostream asm_OS(asm_Buffer);
@@ -624,11 +609,20 @@ void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM)
 }
 
 
+
 // this defines the set of optimization passes defined for Julia at various optimization levels.
 // it assumes that the TLI and TTI wrapper passes have already been added.
 void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
                            bool lower_intrinsics, bool dump_native)
 {
+    // Note: LLVM 12 disabled the hoisting of common instruction
+    //       before loop vectorization (https://reviews.llvm.org/D84108).
+    //
+    // TODO: CommonInstruction hoisting/sinking enables AllocOpt
+    //       to merge allocations and sometimes eliminate them,
+    //       since AllocOpt does not handle PhiNodes.
+    //       Enable this instruction hoisting because of this and Union benchmarks.
+    auto simplifyCFGOptions = SimplifyCFGOptions().hoistCommonInsts(true);
 #ifdef JL_DEBUG_BUILD
     PM->add(createGCInvariantVerifierPass(true));
     PM->add(createVerifierPass());
@@ -636,7 +630,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
 
     PM->add(createConstantMergePass());
     if (opt_level < 2) {
-        PM->add(createCFGSimplificationPass());
+        PM->add(createCFGSimplificationPass(simplifyCFGOptions));
         if (opt_level == 1) {
             PM->add(createSROAPass());
             PM->add(createInstructionCombiningPass());
@@ -662,13 +656,13 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
         PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
         if (dump_native)
             PM->add(createMultiVersioningPass());
-#if defined(JL_ASAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_)
         PM->add(createAddressSanitizerFunctionPass());
 #endif
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
         PM->add(createMemorySanitizerPass(true));
 #endif
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
         PM->add(createThreadSanitizerLegacyPassPass());
 #endif
         return;
@@ -680,7 +674,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
         PM->add(createBasicAAWrapperPass());
     }
 
-    PM->add(createCFGSimplificationPass());
+    PM->add(createCFGSimplificationPass(simplifyCFGOptions));
     PM->add(createDeadCodeEliminationPass());
     PM->add(createSROAPass());
 
@@ -694,12 +688,13 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     PM->add(createAllocOptPass());
     // consider AggressiveInstCombinePass at optlevel > 2
     PM->add(createInstructionCombiningPass());
-    PM->add(createCFGSimplificationPass());
+    PM->add(createCFGSimplificationPass(simplifyCFGOptions));
     if (dump_native)
         PM->add(createMultiVersioningPass());
     PM->add(createSROAPass());
     PM->add(createInstSimplifyLegacyPass());
     PM->add(createJumpThreadingPass());
+    PM->add(createCorrelatedValuePropagationPass());
 
     PM->add(createReassociatePass());
 
@@ -751,6 +746,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     // loops over Union-typed arrays to vectorize.
     PM->add(createInstructionCombiningPass());
     PM->add(createJumpThreadingPass());
+    PM->add(createCorrelatedValuePropagationPass());
     PM->add(createDeadStoreEliminationPass());
 
     // More dead allocation (store) deletion before loop optimization
@@ -759,12 +755,21 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     // see if all of the constant folding has exposed more loops
     // to simplification and deletion
     // this helps significantly with cleaning up iteration
-    PM->add(createCFGSimplificationPass());
+    PM->add(createCFGSimplificationPass()); // See note above, don't hoist instructions before LV
     PM->add(createLoopDeletionPass());
     PM->add(createInstructionCombiningPass());
     PM->add(createLoopVectorizePass());
     PM->add(createLoopLoadEliminationPass());
-    PM->add(createCFGSimplificationPass());
+    // Cleanup after LV pass
+    PM->add(createInstructionCombiningPass());
+    PM->add(createCFGSimplificationPass( // Aggressive CFG simplification
+        SimplifyCFGOptions()
+            .forwardSwitchCondToPhi(true)
+            .convertSwitchToLookupTable(true)
+            .needCanonicalLoops(false)
+            .hoistCommonInsts(true)
+            // .sinkCommonInsts(true) // FIXME: Causes assertion in llvm-late-lowering
+    ));
     PM->add(createSLPVectorizerPass());
     // might need this after LLVM 11:
     //PM->add(createVectorCombinePass());
@@ -800,13 +805,13 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     }
     PM->add(createCombineMulAddPass());
     PM->add(createDivRemPairsPass());
-#if defined(JL_ASAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_)
     PM->add(createAddressSanitizerFunctionPass());
 #endif
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
     PM->add(createMemorySanitizerPass(true));
 #endif
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
     PM->add(createThreadSanitizerLegacyPassPass());
 #endif
 }
@@ -851,7 +856,7 @@ void jl_add_optimization_passes(LLVMPassManagerRef PM, int opt_level, int lower_
 // --- native code info, and dump function to IR and ASM ---
 // Get pointer to llvm::Function instance, compiling if necessary
 // for use in reflection from Julia.
-// this is paired with jl_dump_function_ir, jl_dump_method_asm, jl_dump_llvm_asm in particular ways:
+// this is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
 // misuse will leak memory or cause read-after-free
 extern "C" JL_DLLEXPORT
 void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
@@ -903,8 +908,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
         jl_llvm_functions_t decls;
         JL_LOCK(&codegen_lock);
         uint64_t compiler_start_time = 0;
-        int tid = jl_threadid();
-        if (jl_measure_compile_time[tid])
+        uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+        if (measure_compile_time_enabled)
             compiler_start_time = jl_hrtime();
         std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);
 
@@ -929,8 +934,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
             m.release(); // the return object `llvmf` will be the owning pointer
         }
         JL_GC_POP();
-        if (jl_measure_compile_time[tid])
-            jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+        if (measure_compile_time_enabled)
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
         JL_UNLOCK(&codegen_lock); // Might GC
         if (F)
             return F;
@@ -939,87 +944,3 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
     const char *mname = name_from_method_instance(mi);
     jl_errorf("unable to compile source for function %s", mname);
 }
-
-/// addPassesToX helper drives creation and initialization of TargetPassConfig.
-static MCContext *
-addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM) {
-    TargetPassConfig *PassConfig = TM->createPassConfig(PM);
-    PassConfig->setDisableVerify(false);
-    PM.add(PassConfig);
-#if JL_LLVM_VERSION >= 100000
-    MachineModuleInfoWrapperPass *MMIWP =
-        new MachineModuleInfoWrapperPass(TM);
-    PM.add(MMIWP);
-#else
-    MachineModuleInfo *MMI = new MachineModuleInfo(TM);
-    PM.add(MMI);
-#endif
-    if (PassConfig->addISelPasses())
-        return NULL;
-    PassConfig->addMachinePasses();
-    PassConfig->setInitialized();
-#if JL_LLVM_VERSION >= 100000
-    return &MMIWP->getMMI().getContext();
-#else
-    return &MMI->getContext();
-#endif
-}
-
-void jl_strip_llvm_debug(Module *m);
-
-
-// get a native assembly for llvm::Function
-// TODO: implement debuginfo handling
-extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_llvm_asm(void *F, const char* asm_variant, const char *debuginfo)
-{
-    // precise printing via IR assembler
-    SmallVector<char, 4096> ObjBufferSV;
-    { // scope block
-        Function *f = (Function*)F;
-        llvm::raw_svector_ostream asmfile(ObjBufferSV);
-        assert(!f->isDeclaration());
-        std::unique_ptr<Module> m(f->getParent());
-        for (auto &f2 : m->functions()) {
-            if (f != &f2 && !f->isDeclaration())
-                f2.deleteBody();
-        }
-        jl_strip_llvm_debug(m.get());
-        legacy::PassManager PM;
-        LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(jl_TargetMachine);
-        MCContext *Context = addPassesToGenerateCode(TM, PM);
-        if (Context) {
-            const MCSubtargetInfo &STI = *TM->getMCSubtargetInfo();
-            const MCAsmInfo &MAI = *TM->getMCAsmInfo();
-            const MCRegisterInfo &MRI = *TM->getMCRegisterInfo();
-            const MCInstrInfo &MII = *TM->getMCInstrInfo();
-            unsigned OutputAsmDialect = MAI.getAssemblerDialect();
-            if (!strcmp(asm_variant, "att"))
-                OutputAsmDialect = 0;
-            if (!strcmp(asm_variant, "intel"))
-                OutputAsmDialect = 1;
-            MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter(
-                TM->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI);
-             std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
-                STI, MRI, TM->Options.MCOptions));
-            std::unique_ptr<MCCodeEmitter> MCE;
-#if JL_LLVM_VERSION >= 100000
-            auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
-#else
-            auto FOut = llvm::make_unique<formatted_raw_ostream>(asmfile);
-#endif
-            std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
-                *Context, std::move(FOut), true,
-                true, InstPrinter,
-                std::move(MCE), std::move(MAB),
-                false));
-            std::unique_ptr<AsmPrinter> Printer(
-                TM->getTarget().createAsmPrinter(*TM, std::move(S)));
-            if (Printer) {
-                PM.add(Printer.release());
-                PM.run(*m);
-            }
-        }
-    }
-    return jl_pchar_to_string(ObjBufferSV.data(), ObjBufferSV.size());
-}
diff --git a/src/array.c b/src/array.c
index 20c6cf77068805..43376d8ba222be 100644
--- a/src/array.c
+++ b/src/array.c
@@ -20,27 +20,31 @@ extern "C" {
 
 #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
 
-// this is a version of memcpy that preserves atomic memory ordering
-// which makes it safe to use for objects that can contain memory references
-// without risk of creating pointers out of thin air
-// TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32
-//       aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit)
-static void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
+static inline void arrayassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
 {
-    size_t i;
-    if (dstp < srcp || dstp > srcp + n) {
-        for (i = 0; i < n; i++) {
-            jl_atomic_store_relaxed(dstp + i, jl_atomic_load_relaxed(srcp + i));
-        }
+    // array can assume more alignment than a field would normally have
+    assert(nb >= jl_datatype_size(jl_typeof(src))); // nb might move some undefined bits, but we should be okay with that
+    if (hasptr) {
+        size_t nptr = nb / sizeof(void*);
+        memmove_refs((void**)dst, (void**)src, nptr);
+        jl_gc_multi_wb(parent, src);
     }
     else {
-        for (i = 0; i < n; i++) {
-            jl_atomic_store_relaxed(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
+        switch (nb) {
+        case  0: break;
+        case  1: *(uint8_t*)dst  = *(uint8_t*)src;  break;
+        case  2: *(uint16_t*)dst = *(uint16_t*)src; break;
+        case  4: *(uint32_t*)dst = *(uint32_t*)src; break;
+        case  8: *(uint64_t*)dst = *(uint64_t*)src; break;
+        case 16:
+            memcpy(jl_assume_aligned(dst, 16), jl_assume_aligned(src, 16), 16);
+            break;
+        default: memcpy(dst, src, nb);
         }
     }
 }
 
-static void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
+static inline void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
 {
     if (hasptr)
         memmove_refs((void**)dst, (void**)src, nb / sizeof(void*));
@@ -70,14 +74,12 @@ typedef __uint128_t wideint_t;
 typedef uint64_t wideint_t;
 #endif
 
-size_t jl_arr_xtralloc_limit = 0;
-
 #define MAXINTVAL (((size_t)-1)>>1)
 
 static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
                                int8_t isunboxed, int8_t hasptr, int8_t isunion, int8_t zeroinit, int elsz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     size_t i, tot, nel=1;
     void *data;
     jl_array_t *a;
@@ -112,27 +114,30 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
     }
 
     int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
     if (tot <= ARRAY_INLINE_NBYTES) {
-        if (isunboxed && elsz >= 4)
-            tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align data area
+        // align data area
+        if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
+            tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT);
+        else if (isunboxed && elsz >= 4)
+            tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT);
         size_t doffs = tsz;
         tsz += tot;
-        tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align whole object
-        a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+        // jl_array_t is large enough that objects will always be aligned 16
+        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
+        assert(((size_t)a & 15) == 0);
         // No allocation or safepoint allowed after this
         a->flags.how = 0;
         data = (char*)a + doffs;
     }
     else {
-        tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); // align whole object
         data = jl_gc_managed_malloc(tot);
         // Allocate the Array **after** allocating the data
         // to make sure the array is still young
-        a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
         // No allocation or safepoint allowed after this
         a->flags.how = 2;
-        jl_gc_track_malloced_array(ptls, a);
+        jl_gc_track_malloced_array(ct->ptls, a);
     }
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
 
@@ -213,7 +218,7 @@ static inline int is_ntuple_long(jl_value_t *v)
 JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
                                           jl_value_t *_dims)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_array_t *a;
     size_t ndims = jl_nfields(_dims);
     assert(is_ntuple_long(_dims));
@@ -221,8 +226,8 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
     assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
 
     int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
-    a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+    int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*);
+    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
     // No allocation or safepoint allowed after this
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->flags.ndims = ndims;
@@ -298,12 +303,12 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
 
 JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_array_t *a;
 
     int ndimwords = jl_array_ndimwords(1);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
-    a = (jl_array_t*)jl_gc_alloc(ptls, tsz, jl_array_uint8_type);
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*);
+    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type);
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->flags.ndims = 1;
     a->offset = 0;
@@ -327,7 +332,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_array_t *a;
     jl_value_t *eltype = jl_tparam0(atype);
 
@@ -349,8 +354,8 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                       "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
 
     int ndimwords = jl_array_ndimwords(1);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
-    a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
+    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
     // No allocation or safepoint allowed after this
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->data = data;
@@ -365,7 +370,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
     a->flags.isaligned = 0;  // TODO: allow passing memalign'd buffers
     if (own_buffer) {
         a->flags.how = 2;
-        jl_gc_track_malloced_array(ptls, a);
+        jl_gc_track_malloced_array(ct->ptls, a);
         jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
     }
     else {
@@ -381,7 +386,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *_dims, int own_buffer)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     size_t nel = 1;
     jl_array_t *a;
     size_t ndims = jl_nfields(_dims);
@@ -416,8 +421,8 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                       "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
 
     int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
-    a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
+    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
     // No allocation or safepoint allowed after this
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->data = data;
@@ -433,7 +438,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
     a->flags.isaligned = 0;
     if (own_buffer) {
         a->flags.how = 2;
-        jl_gc_track_malloced_array(ptls, a);
+        jl_gc_track_malloced_array(ct->ptls, a);
         jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
     }
     else {
@@ -512,30 +517,39 @@ JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
     return jl_pchar_to_string((const char*)jl_array_data(a), len);
 }
 
-JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
+JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
 {
+    if (len == 0)
+        return jl_an_empty_string;
     size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
     if (sz < len) // overflow
         jl_throw(jl_memory_exception);
-    if (len == 0)
-        return jl_an_empty_string;
-    jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *s;
+    jl_ptls_t ptls = ct->ptls;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        int pool_id = jl_gc_szclass_align8(allocsz);
+        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
+        int osize = jl_gc_sizeclasses[pool_id];
+        s = jl_gc_pool_alloc(ptls, (char*)p - (char*)ptls, osize);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        s = jl_gc_big_alloc(ptls, allocsz);
+    }
+    jl_set_typeof(s, jl_string_type);
     *(size_t*)s = len;
-    memcpy((char*)s + sizeof(size_t), str, len);
-    ((char*)s + sizeof(size_t))[len] = 0;
+    jl_string_data(s)[len] = 0;
     return s;
 }
 
-JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
+JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
 {
-    size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
-    if (sz < len) // overflow
-        jl_throw(jl_memory_exception);
-    if (len == 0)
-        return jl_an_empty_string;
-    jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining
-    *(size_t*)s = len;
-    ((char*)s + sizeof(size_t))[len] = 0;
+    jl_value_t *s = jl_alloc_string(len);
+    if (len > 0)
+        memcpy(jl_string_data(s), str, len);
     return s;
 }
 
@@ -594,7 +608,10 @@ JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
         if (jl_is_datatype_singleton((jl_datatype_t*)eltype))
             return ((jl_datatype_t*)eltype)->instance;
     }
-    return undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, &((char*)a->data)[i * a->elsize]));
+    jl_value_t *r = undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, &((char*)a->data)[i * a->elsize]));
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
 }
 
 JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i)
@@ -622,6 +639,7 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs
         JL_GC_POP();
     }
     if (!a->flags.ptrarray) {
+        int hasptr;
         if (jl_is_uniontype(eltype)) {
             uint8_t *psel = &((uint8_t*)jl_array_typetagdata(a))[i];
             unsigned nth = 0;
@@ -630,15 +648,12 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs
             *psel = nth;
             if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
                 return;
-        }
-        if (a->flags.hasptr) {
-            memmove_refs((void**)&((char*)a->data)[i * a->elsize], (void**)rhs, a->elsize / sizeof(void*));
+            hasptr = 0;
         }
         else {
-            jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs);
+            hasptr = a->flags.hasptr;
         }
-        if (a->flags.hasptr)
-            jl_gc_multi_wb(jl_array_owner(a), rhs);
+        arrayassign_safe(hasptr, jl_array_owner(a), &((char*)a->data)[i * a->elsize], rhs, a->elsize);
     }
     else {
         jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, rhs);
@@ -672,7 +687,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
 // the **beginning** of the new buffer.
 static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(!a->flags.isshared || a->flags.how == 3);
     size_t elsz = a->elsize;
     size_t nbytes = newlen * elsz;
@@ -714,12 +729,12 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
         newbuf = 1;
         if (nbytes >= MALLOC_THRESH) {
             a->data = jl_gc_managed_malloc(nbytes);
-            jl_gc_track_malloced_array(ptls, a);
+            jl_gc_track_malloced_array(ct->ptls, a);
             a->flags.how = 2;
             a->flags.isaligned = 1;
         }
         else {
-            a->data = jl_gc_alloc_buf(ptls, nbytes);
+            a->data = jl_gc_alloc_buf(ct->ptls, nbytes);
             a->flags.how = 1;
             jl_gc_wb_buf(a, a->data, nbytes);
         }
@@ -756,16 +771,23 @@ static void NOINLINE array_try_unshare(jl_array_t *a)
     }
 }
 
-static size_t limit_overallocation(jl_array_t *a, size_t alen, size_t newlen, size_t inc)
+size_t overallocation(size_t maxsize)
 {
-    // Limit overallocation to jl_arr_xtralloc_limit
-    size_t es = a->elsize;
-    size_t xtra_elems_mem = (newlen - a->offset - alen - inc) * es;
-    if (xtra_elems_mem > jl_arr_xtralloc_limit) {
-        // prune down
-        return alen + inc + a->offset + (jl_arr_xtralloc_limit / es);
-    }
-    return newlen;
+    if (maxsize < 8)
+        return 8;
+    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
+    // for small n, we grow faster than O(n)
+    // for large n, we grow at O(n/8)
+    // and as we reach O(memory) for memory>>1MB,
+    // this means we end by adding about 10% of memory each time
+    int exp2 = sizeof(maxsize) * 8 -
+#ifdef _P64
+        __builtin_clzll(maxsize);
+#else
+        __builtin_clz(maxsize);
+#endif
+    maxsize += ((size_t)1 << (exp2 * 7 / 8)) * 4 + maxsize / 8;
+    return maxsize;
 }
 
 STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
@@ -813,10 +835,12 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
         size_t nb1 = idx * elsz;
         if (inc > (a->maxsize - n) / 2 - (a->maxsize - n) / 20) {
             // not enough room for requested growth from end of array
-            size_t newlen = a->maxsize == 0 ? inc * 2 : a->maxsize * 2;
+            size_t newlen = inc * 2;
             while (n + 2 * inc > newlen - a->offset)
                 newlen *= 2;
-            newlen = limit_overallocation(a, n, newlen, 2 * inc);
+            size_t newmaxsize = overallocation(a->maxsize);
+            if (newlen < newmaxsize)
+                newlen = newmaxsize;
             size_t newoffset = (newlen - newnrows) / 2;
             if (!array_resize_buffer(a, newlen)) {
                 data = (char*)a->data + oldoffsnb;
@@ -901,12 +925,11 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
     if (__unlikely(reqmaxsize > a->maxsize)) {
         size_t nb1 = idx * elsz;
         size_t nbinc = inc * elsz;
-        // if the requested size is more than 2x current maxsize, grow exactly
-        // otherwise double the maxsize
-        size_t newmaxsize = reqmaxsize >= a->maxsize * 2
-                          ? (reqmaxsize < 4 ? 4 : reqmaxsize)
-                          : a->maxsize * 2;
-        newmaxsize = limit_overallocation(a, n, newmaxsize, inc);
+        // grow either by our computed overallocation factor or exactly the requested size,
+        // whichever is larger
+        size_t newmaxsize = overallocation(a->maxsize);
+        if (newmaxsize < reqmaxsize)
+            newmaxsize = reqmaxsize;
         size_t oldmaxsize = a->maxsize;
         int newbuf = array_resize_buffer(a, newmaxsize);
         char *newdata = (char*)a->data + a->offset * elsz;
@@ -1008,8 +1031,9 @@ STATIC_INLINE void jl_array_shrink(jl_array_t *a, size_t dec)
             typetagdata = (char*)malloc_s(a->nrows);
             memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
         }
+        jl_task_t *ct = jl_current_task;
         char *originaldata = (char*) a->data - a->offset * a->elsize;
-        char *newdata = (char*)jl_gc_alloc_buf(jl_get_ptls_states(), newbytes);
+        char *newdata = (char*)jl_gc_alloc_buf(ct->ptls, newbytes);
         jl_gc_wb_buf(a, newdata, newbytes);
         a->maxsize -= dec;
         if (isbitsunion) {
@@ -1095,7 +1119,7 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
         // Move the rest of the data if the offset changed
         if (newoffs != offset) {
             memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, n - idx);
+            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, a->nrows - idx);
         }
         a->data = newdata;
     }
diff --git a/src/ast.c b/src/ast.c
index cb96ec51976d3d..c33bf56d913792 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -28,6 +28,7 @@ extern "C" {
 
 // head symbols for each expression type
 jl_sym_t *call_sym;    jl_sym_t *invoke_sym;
+jl_sym_t *invoke_modify_sym;
 jl_sym_t *empty_sym;   jl_sym_t *top_sym;
 jl_sym_t *module_sym;  jl_sym_t *slot_sym;
 jl_sym_t *export_sym;  jl_sym_t *import_sym;
@@ -42,7 +43,7 @@ jl_sym_t *enter_sym;   jl_sym_t *leave_sym;
 jl_sym_t *pop_exception_sym;
 jl_sym_t *exc_sym;     jl_sym_t *error_sym;
 jl_sym_t *new_sym;     jl_sym_t *using_sym;
-jl_sym_t *splatnew_sym;
+jl_sym_t *splatnew_sym; jl_sym_t *block_sym;
 jl_sym_t *new_opaque_closure_sym;
 jl_sym_t *opaque_closure_method_sym;
 jl_sym_t *const_sym;   jl_sym_t *thunk_sym;
@@ -69,6 +70,16 @@ jl_sym_t *optlevel_sym; jl_sym_t *thismodule_sym;
 jl_sym_t *atom_sym; jl_sym_t *statement_sym; jl_sym_t *all_sym;
 jl_sym_t *compile_sym; jl_sym_t *infer_sym;
 
+jl_sym_t *atomic_sym;
+jl_sym_t *not_atomic_sym;
+jl_sym_t *unordered_sym;
+jl_sym_t *monotonic_sym;
+jl_sym_t *acquire_sym;
+jl_sym_t *release_sym;
+jl_sym_t *acquire_release_sym;
+jl_sym_t *sequentially_consistent_sym;
+
+
 static uint8_t flisp_system_image[] = {
 #include <julia_flisp.boot.inc>
 };
@@ -135,7 +146,7 @@ struct macroctx_stack {
 
 static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mod);
 static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v);
-static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world);
+static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error);
 
 static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
@@ -267,7 +278,7 @@ static jl_ast_context_list_t *jl_ast_ctx_freed = NULL;
 
 static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     JL_SIGATOMIC_BEGIN();
     JL_LOCK_NOGC(&flisp_lock);
     jl_ast_context_list_t *node;
@@ -275,7 +286,7 @@ static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOI
     // First check if the current task is using one of the contexts
     for (node = jl_ast_ctx_using;node;(node = node->next)) {
         ctx = jl_ast_context_list_item(node);
-        if (ctx->task == ptls->current_task) {
+        if (ctx->task == ct) {
             ctx->ref++;
             JL_UNLOCK_NOGC(&flisp_lock);
             return ctx;
@@ -287,7 +298,7 @@ static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOI
         jl_ast_context_list_insert(&jl_ast_ctx_using, node);
         ctx = jl_ast_context_list_item(node);
         ctx->ref = 1;
-        ctx->task = ptls->current_task;
+        ctx->task = ct;
         ctx->module = NULL;
         JL_UNLOCK_NOGC(&flisp_lock);
         return ctx;
@@ -295,7 +306,7 @@ static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOI
     // Construct a new one if we can't find any
     ctx = (jl_ast_context_t*)calloc(1, sizeof(jl_ast_context_t));
     ctx->ref = 1;
-    ctx->task = ptls->current_task;
+    ctx->task = ct;
     node = &ctx->list;
     jl_ast_context_list_insert(&jl_ast_ctx_using, node);
     JL_UNLOCK_NOGC(&flisp_lock);
@@ -318,11 +329,11 @@ static void jl_ast_ctx_leave(jl_ast_context_t *ctx)
 
 void jl_init_flisp(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (jl_ast_ctx_using || jl_ast_ctx_freed)
         return;
     jl_ast_main_ctx.ref = 1;
-    jl_ast_main_ctx.task = ptls->current_task;
+    jl_ast_main_ctx.task = ct;
     jl_ast_context_list_insert(&jl_ast_ctx_using, &jl_ast_main_ctx.list);
     jl_init_ast_ctx(&jl_ast_main_ctx);
     // To match the one in jl_ast_ctx_leave
@@ -335,6 +346,7 @@ void jl_init_common_symbols(void)
     empty_sym = jl_symbol("");
     call_sym = jl_symbol("call");
     invoke_sym = jl_symbol("invoke");
+    invoke_modify_sym = jl_symbol("invoke_modify");
     foreigncall_sym = jl_symbol("foreigncall");
     cfunction_sym = jl_symbol("cfunction");
     quote_sym = jl_symbol("quote");
@@ -407,9 +419,18 @@ void jl_init_common_symbols(void)
     aliasscope_sym = jl_symbol("aliasscope");
     popaliasscope_sym = jl_symbol("popaliasscope");
     thismodule_sym = jl_symbol("thismodule");
+    block_sym = jl_symbol("block");
     atom_sym = jl_symbol("atom");
     statement_sym = jl_symbol("statement");
     all_sym = jl_symbol("all");
+    atomic_sym = jl_symbol("atomic");
+    not_atomic_sym = jl_symbol("not_atomic");
+    unordered_sym = jl_symbol("unordered");
+    monotonic_sym = jl_symbol("monotonic");
+    acquire_sym = jl_symbol("acquire");
+    release_sym = jl_symbol("release");
+    acquire_release_sym = jl_symbol("acquire_release");
+    sequentially_consistent_sym = jl_symbol("sequentially_consistent");
 }
 
 JL_DLLEXPORT void jl_lisp_prompt(void)
@@ -675,8 +696,6 @@ static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v)
 
 static void array_to_list(fl_context_t *fl_ctx, jl_array_t *a, value_t *pv, int check_valid)
 {
-    if (jl_array_len(a) > 650000)
-        lerror(fl_ctx, symbol(fl_ctx, "error"), "expression too large");
     value_t temp;
     for(long i=jl_array_len(a)-1; i >= 0; i--) {
         *pv = fl_cons(fl_ctx, fl_ctx->NIL, *pv);
@@ -761,6 +780,8 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali
         jl_expr_t *ex = (jl_expr_t*)v;
         value_t args = fl_ctx->NIL;
         fl_gc_handle(fl_ctx, &args);
+        if (jl_expr_nargs(ex) > 520000 && ex->head != block_sym)
+            lerror(fl_ctx, symbol(fl_ctx, "error"), "expression too large");
         array_to_list(fl_ctx, ex->args, &args, check_valid);
         value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)ex->head, check_valid);
         if (ex->head == lambda_sym && jl_expr_nargs(ex)>0 && jl_is_array(jl_exprarg(ex,0))) {
@@ -915,6 +936,8 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
                 jl_array_ptr_ref(new_code, i)
             ));
         }
+        new_ci->code = new_code;
+        jl_gc_wb(new_ci, new_code);
         new_ci->slotnames = jl_array_copy(new_ci->slotnames);
         jl_gc_wb(new_ci, new_ci->slotnames);
         new_ci->slotflags = jl_array_copy(new_ci->slotflags);
@@ -1031,9 +1054,9 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
     return 0;
 }
 
-static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world)
+static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world, int throw_load_error)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     JL_TIMING(MACRO_INVOCATION);
     size_t nargs = jl_array_len(args) + 1;
     JL_NARGSV("macrocall", 3); // macro name, location, and module
@@ -1051,8 +1074,8 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
     for (i = 3; i < nargs; i++)
         margs[i] = jl_array_ptr_ref(args, i - 1);
 
-    size_t last_age = ptls->world_age;
-    ptls->world_age = world < jl_world_counter ? world : jl_world_counter;
+    size_t last_age = ct->world_age;
+    ct->world_age = world < jl_world_counter ? world : jl_world_counter;
     jl_value_t *result;
     JL_TRY {
         margs[0] = jl_toplevel_eval(*ctx, margs[0]);
@@ -1066,7 +1089,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
         result = jl_invoke(margs[0], &margs[1], nargs - 1, mfunc);
     }
     JL_CATCH {
-        if (jl_loaderror_type == NULL) {
+        if ((jl_loaderror_type == NULL) || !throw_load_error) {
             jl_rethrow();
         }
         else {
@@ -1081,12 +1104,12 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
                                            jl_current_exception()));
         }
     }
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     JL_GC_POP();
     return result;
 }
 
-static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world)
+static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error)
 {
     if (!expr || !jl_is_expr(expr))
         return expr;
@@ -1100,7 +1123,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     if (e->head == quote_sym && jl_expr_nargs(e) == 1) {
         expr = jl_call_scm_on_ast("julia-bq-macro", jl_exprarg(e, 0), inmodule);
         JL_GC_PUSH1(&expr);
-        expr = jl_expand_macros(expr, inmodule, macroctx, onelevel, world);
+        expr = jl_expand_macros(expr, inmodule, macroctx, onelevel, world, throw_load_error);
         JL_GC_POP();
         return expr;
     }
@@ -1110,7 +1133,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         JL_TYPECHK(hygienic-scope, module, (jl_value_t*)newctx.m);
         newctx.parent = macroctx;
         jl_value_t *a = jl_exprarg(e, 0);
-        jl_value_t *a2 = jl_expand_macros(a, inmodule, &newctx, onelevel, world);
+        jl_value_t *a2 = jl_expand_macros(a, inmodule, &newctx, onelevel, world, throw_load_error);
         if (a != a2)
             jl_array_ptr_set(e->args, 0, a2);
         return expr;
@@ -1119,7 +1142,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         struct macroctx_stack newctx;
         newctx.m = macroctx ? macroctx->m : inmodule;
         newctx.parent = macroctx;
-        jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, world);
+        jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, world, throw_load_error);
         jl_value_t *wrap = NULL;
         JL_GC_PUSH3(&result, &wrap, &newctx.m);
         // copy and wrap the result in `(hygienic-scope ,result ,newctx)
@@ -1129,7 +1152,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
             wrap = (jl_value_t*)jl_exprn(hygienicscope_sym, 2);
         result = jl_copy_ast(result);
         if (!onelevel)
-            result = jl_expand_macros(result, inmodule, wrap ? &newctx : macroctx, onelevel, world);
+            result = jl_expand_macros(result, inmodule, wrap ? &newctx : macroctx, onelevel, world, throw_load_error);
         if (wrap) {
             jl_exprargset(wrap, 0, result);
             jl_exprargset(wrap, 1, newctx.m);
@@ -1151,7 +1174,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         for (j = 2; j < nm; j++) {
             jl_exprargset(mc2, j+1, jl_exprarg(mc, j));
         }
-        jl_value_t *ret = jl_expand_macros((jl_value_t*)mc2, inmodule, macroctx, onelevel, world);
+        jl_value_t *ret = jl_expand_macros((jl_value_t*)mc2, inmodule, macroctx, onelevel, world, throw_load_error);
         JL_GC_POP();
         return ret;
     }
@@ -1162,7 +1185,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
     size_t i;
     for (i = 0; i < jl_array_len(e->args); i++) {
         jl_value_t *a = jl_array_ptr_ref(e->args, i);
-        jl_value_t *a2 = jl_expand_macros(a, inmodule, macroctx, onelevel, world);
+        jl_value_t *a2 = jl_expand_macros(a, inmodule, macroctx, onelevel, world, throw_load_error);
         if (a != a2)
             jl_array_ptr_set(e->args, i, a2);
     }
@@ -1174,7 +1197,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_world_counter);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_world_counter, 0);
     expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
     JL_GC_POP();
     return expr;
@@ -1185,7 +1208,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand1(jl_value_t *expr, jl_module_t *inmodule
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_world_counter);
+    expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_world_counter, 0);
     expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
     JL_GC_POP();
     return expr;
@@ -1211,7 +1234,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmod
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, world);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1);
     expr = jl_call_scm_on_ast_and_loc("jl-expand-to-thunk", expr, inmodule, file, line);
     JL_GC_POP();
     return expr;
@@ -1224,7 +1247,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
     jl_ast_context_t *ctx = jl_ast_ctx_enter();
     fl_context_t *fl_ctx = &ctx->fl;
     JL_AST_PRESERVE_PUSH(ctx, old_roots, inmodule);
@@ -1245,7 +1268,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
     expr = jl_call_scm_on_ast_and_loc("jl-expand-to-thunk-stmt", expr, inmodule, file, line);
     JL_GC_POP();
     return expr;
@@ -1284,11 +1307,11 @@ JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t
     args[2] = filename;
     args[3] = jl_box_ulong(offset);
     args[4] = options;
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
-    ptls->world_age = jl_world_counter;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_world_counter;
     jl_value_t *result = jl_apply(args, 5);
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     args[0] = result; // root during error checks below
     JL_TYPECHK(parse, simplevector, result);
     if (jl_svec_len(result) != 2)
diff --git a/src/ast.scm b/src/ast.scm
index 6ed530718e3dbe..e5148a507a4fdb 100644
--- a/src/ast.scm
+++ b/src/ast.scm
@@ -61,6 +61,12 @@
         (else
          (string e))))
 
+(define (deparse-semicolons n)
+  ; concatenate n semicolons
+  (if (<= n 0)
+      ""
+      (string ";" (deparse-semicolons (1- n)))))
+
 (define (deparse e (ilvl 0))
   (cond ((or (symbol? e) (number? e)) (string e))
         ((string? e) (print-to-string e))
@@ -134,7 +140,14 @@
            ((hcat)        (string #\[ (deparse-arglist (cdr e) " ") #\]))
            ((typed_hcat)  (string (deparse (cadr e))
                                   (deparse (cons 'hcat (cddr e)))))
+           ((ncat)        (string #\[ (deparse-arglist (cddr e) (string (deparse-semicolons (cadr e)) " "))
+                                      (if (= (length (cddr e)) 1)
+                                          (deparse-semicolons (cadr e))
+                                          "") #\]))
+           ((typed_ncat)  (string (deparse (cadr e))
+                                  (deparse (cons 'ncat (cddr e)))))
            ((row)        (deparse-arglist (cdr e) " "))
+           ((nrow)       (deparse-arglist (cddr e) (string (deparse-semicolons (cadr e)) " ")))
            ((braces)     (string #\{ (deparse-arglist (cdr e) ", ") #\}))
            ((bracescat)  (string #\{ (deparse-arglist (cdr e) "; ") #\}))
            ((string)
@@ -276,7 +289,7 @@
 ;; predicates and accessors
 
 (define (quoted? e)
-  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds loopinfo)))
+  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds inline noinline loopinfo)))
 (define (quotify e) `',e)
 (define (unquote e)
   (if (and (pair? e) (memq (car e) '(quote inert)))
@@ -306,7 +319,7 @@
          (bad-formal-argument v))
         (else
          (case (car v)
-           ((... kw)
+           ((...)
 	    (arg-name (cadr v)) ;; to check for errors
 	    (decl-var (cadr v)))
            ((|::|)
@@ -317,6 +330,8 @@
             (if (nospecialize-meta? v #t)
                 (arg-name (caddr v))
                 (bad-formal-argument v)))
+           ((kw)
+            (arg-name (cadr v)))
            (else (bad-formal-argument v))))))
 
 (define (arg-type v)
@@ -336,6 +351,8 @@
             (if (nospecialize-meta? v #t)
                 (arg-type (caddr v))
                 (bad-formal-argument v)))
+           ((kw)
+            (arg-type (cadr v)))
            (else (bad-formal-argument v))))))
 
 ;; convert a lambda list into a list of just symbols
@@ -350,6 +367,12 @@
 (define (decl? e)
   (and (pair? e) (eq? (car e) '|::|)))
 
+(define (symdecl? e)
+  (or (symbol? e) (decl? e)))
+
+(define (eventually-decl? e)
+  (or (decl? e) (and (pair? e) (eq? (car e) 'atomic) (symdecl? (cadr e)))))
+
 (define (make-decl n t) `(|::| ,n ,t))
 
 (define (ssavalue? e)
diff --git a/src/atomics.h b/src/atomics.h
index d3aa9d8ba8b3be..0fa5d6c193513a 100644
--- a/src/atomics.h
+++ b/src/atomics.h
@@ -20,6 +20,19 @@
 #endif
 #include <signal.h>
 
+enum jl_memory_order {
+    jl_memory_order_unspecified = -2,
+    jl_memory_order_invalid = -1,
+    jl_memory_order_notatomic = 0,
+    jl_memory_order_unordered,
+    jl_memory_order_monotonic,
+    jl_memory_order_consume,
+    jl_memory_order_acquire,
+    jl_memory_order_release,
+    jl_memory_order_acq_rel,
+    jl_memory_order_seq_cst
+};
+
 /**
  * Thread synchronization primitives:
  *
@@ -30,23 +43,18 @@
  * synchronization in order to lower the mutator overhead as much as possible.
  *
  * We use the compiler intrinsics to implement a similar API to the c11/c++11
- * one instead of using it directly because,
- *
- *     1. We support GCC 4.7 and GCC add support for c11 atomics in 4.9.
- *        Luckily, the __atomic intrinsics were added in GCC 4.7.
- *     2. (most importantly) we need interoperability between code written
- *        in different languages.
- *        The current c++ standard (c++14) does not allow using c11 atomic
- *        functions or types and there's currently no guarantee that the two
- *        types are compatible (although most of them probably are).
- *        We also need to access these atomic variables from the LLVM JIT code
- *        which is very hard unless the layout of the object is fully
- *        specified.
+ * one instead of using it directly because, we need interoperability between
+ * code written in different languages. The current c++ standard (c++14) does
+ * not allow using c11 atomic functions or types and there's currently no
+ * guarantee that the two types are compatible (although most of them probably
+ * are). We also need to access these atomic variables from the LLVM JIT code
+ * which is very hard unless the layout of the object is fully specified.
  */
-#if defined(__GNUC__)
-#  define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
-#  define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
-#  define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
+#define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
+#define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
+
+
 #  define jl_atomic_fetch_add_relaxed(obj, arg)         \
     __atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
 #  define jl_atomic_fetch_add(obj, arg)                 \
@@ -61,22 +69,20 @@
     __atomic_fetch_or(obj, arg, __ATOMIC_RELAXED)
 #  define jl_atomic_fetch_or(obj, arg)                  \
     __atomic_fetch_or(obj, arg, __ATOMIC_SEQ_CST)
-// Returns the original value of `obj`
-// Use the legacy __sync builtins for now, this can also be written using
-// the __atomic builtins or c11 atomics with GNU extension or c11 _Generic
-#  define jl_atomic_compare_exchange(obj, expected, desired)    \
-    __sync_val_compare_and_swap(obj, expected, desired)
-#  define jl_atomic_bool_compare_exchange(obj, expected, desired)          \
-    __sync_bool_compare_and_swap(obj, expected, desired)
+#  define jl_atomic_cmpswap(obj, expected, desired)    \
+    __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+#  define jl_atomic_cmpswap_relaxed(obj, expected, desired)    \
+    __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+// TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
 #  define jl_atomic_exchange(obj, desired)              \
     __atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST)
 #  define jl_atomic_exchange_relaxed(obj, desired)      \
     __atomic_exchange_n(obj, desired, __ATOMIC_RELAXED)
-// TODO: Maybe add jl_atomic_compare_exchange_weak for spin lock
 #  define jl_atomic_store(obj, val)                     \
     __atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
 #  define jl_atomic_store_relaxed(obj, val)             \
     __atomic_store_n(obj, val, __ATOMIC_RELAXED)
+
 #  if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
     !(defined(_CPU_X86_) || defined(_CPU_X86_64_))
 // ICC and Clang doesn't have this bug...
@@ -86,6 +92,7 @@
 // Workaround a GCC bug when using store with release order by using the
 // stronger version instead.
 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
+// fixed in https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=d8c40eff56f69877b33c697ded756d50fde90c27
 #    define jl_atomic_store_release(obj, val) do {      \
         jl_signal_fence();                              \
         __atomic_store_n(obj, val, __ATOMIC_RELEASE);   \
@@ -95,7 +102,7 @@
     __atomic_load_n(obj, __ATOMIC_SEQ_CST)
 #  define jl_atomic_load_acquire(obj)           \
     __atomic_load_n(obj, __ATOMIC_ACQUIRE)
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
 // For the sake of tsan, call these loads consume ordering since they will act
 // as such on the processors we support while normally, the compiler would
 // upgrade this to acquire ordering, which is strong (and slower) than we want.
@@ -105,237 +112,50 @@
 #  define jl_atomic_load_relaxed(obj)           \
     __atomic_load_n(obj, __ATOMIC_RELAXED)
 #endif
-#elif defined(_COMPILER_MICROSOFT_)
-// TODO: these only define compiler barriers, and aren't correct outside of x86
-#  define jl_fence() _ReadWriteBarrier()
-#  define jl_fence_release() _WriteBarrier()
-#  define jl_signal_fence() _ReadWriteBarrier()
-
-// add
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_fetch_add(T *obj, T2 arg)
-{
-    return (T)_InterlockedExchangeAdd8((volatile char*)obj, (char)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_fetch_add(T *obj, T2 arg)
-{
-    return (T)_InterlockedExchangeAdd16((volatile short*)obj, (short)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_fetch_add(T *obj, T2 arg)
-{
-    return (T)_InterlockedExchangeAdd((volatile LONG*)obj, (LONG)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_fetch_add(T *obj, T2 arg)
-{
-    return (T)_InterlockedExchangeAdd64((volatile __int64*)obj, (__int64)arg);
-}
-#define jl_atomic_fetch_add_relaxed(obj, arg) jl_atomic_fetch_add(obj, arg)
-
-// and
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_fetch_and(T *obj, T2 arg)
-{
-    return (T)_InterlockedAnd8((volatile char*)obj, (char)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_fetch_and(T *obj, T2 arg)
-{
-    return (T)_InterlockedAnd16((volatile short*)obj, (short)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_fetch_and(T *obj, T2 arg)
-{
-    return (T)_InterlockedAnd((volatile LONG*)obj, (LONG)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_fetch_and(T *obj, T2 arg)
-{
-    return (T)_InterlockedAnd64((volatile __int64*)obj, (__int64)arg);
-}
-#define jl_atomic_fetch_and_relaxed(obj, arg) jl_atomic_fetch_and(obj, arg)
-
-// or
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_fetch_or(T *obj, T2 arg)
-{
-    return (T)_InterlockedOr8((volatile char*)obj, (char)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_fetch_or(T *obj, T2 arg)
-{
-    return (T)_InterlockedOr16((volatile short*)obj, (short)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_fetch_or(T *obj, T2 arg)
-{
-    return (T)_InterlockedOr((volatile LONG*)obj, (LONG)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_fetch_or(T *obj, T2 arg)
-{
-    return (T)_InterlockedOr64((volatile __int64*)obj, (__int64)arg);
-}
-#define jl_atomic_fetch_or_relaxed(obj, arg) jl_atomic_fetch_or(obj, arg)
-
-// Returns the original value of `obj`
-template<typename T, typename T2, typename T3>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
-{
-    return (T)_InterlockedCompareExchange8((volatile char*)obj,
-                                           (char)desired, (char)expected);
-}
-template<typename T, typename T2, typename T3>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
-{
-    return (T)_InterlockedCompareExchange16((volatile short*)obj,
-                                            (short)desired, (short)expected);
-}
-template<typename T, typename T2, typename T3>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
-{
-    return (T)_InterlockedCompareExchange((volatile LONG*)obj,
-                                          (LONG)desired, (LONG)expected);
-}
-template<typename T, typename T2, typename T3>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
-{
-    return (T)_InterlockedCompareExchange64((volatile __int64*)obj,
-                                            (__int64)desired, (__int64)expected);
-}
-// TODO: jl_atomic_bool_compare_exchange
-// atomic exchange
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_exchange(volatile T *obj, T2 val)
-{
-    return _InterlockedExchange8((volatile char*)obj, (char)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_exchange(volatile T *obj, T2 val)
-{
-    return _InterlockedExchange16((volatile short*)obj, (short)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_exchange(volatile T *obj, T2 val)
-{
-    return _InterlockedExchange((volatile LONG*)obj, (LONG)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_exchange(volatile T *obj, T2 val)
-{
-    return _InterlockedExchange64((volatile __int64*)obj, (__int64)val);
-}
-#define jl_atomic_exchange_relaxed(obj, val) jl_atomic_exchange(obj, val)
-// atomic stores
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1>::type
-jl_atomic_store(volatile T *obj, T2 val)
-{
-    _InterlockedExchange8((volatile char*)obj, (char)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2>::type
-jl_atomic_store(volatile T *obj, T2 val)
-{
-    _InterlockedExchange16((volatile short*)obj, (short)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4>::type
-jl_atomic_store(volatile T *obj, T2 val)
-{
-    _InterlockedExchange((volatile LONG*)obj, (LONG)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8>::type
-jl_atomic_store(volatile T *obj, T2 val)
-{
-    _InterlockedExchange64((volatile __int64*)obj, (__int64)val);
-}
-template<typename T, typename T2>
-static inline void jl_atomic_store_release(volatile T *obj, T2 val)
-{
-    jl_signal_fence();
-    *obj = (T)val;
-}
-template<typename T, typename T2>
-static inline void jl_atomic_store_relaxed(volatile T *obj, T2 val)
-{
-    *obj = (T)val;
-}
-// atomic loads
-template<typename T>
-static inline T jl_atomic_load(volatile T *obj)
-{
-    // Trick to generate cheaper instructions compare to `_InterlockedOr`
-    // Note that we don't care whether the exchange succeeded or not...
-    return jl_atomic_compare_exchange(obj, T(0), T(0));
-}
-template<typename T>
-static inline T jl_atomic_load_acquire(volatile T *obj)
-{
-    T val = *obj;
-    jl_signal_fence();
-    return val;
-}
-#else
-#  error "No atomic operations supported."
-#endif
 
 #ifdef __clang_analyzer__
 // for the purposes of the analyzer, we can turn these into non-atomic expressions with similar properties
+// (for the sake of the analyzer, we don't care if it is an exact match for behavior)
 
 #undef jl_atomic_exchange
 #undef jl_atomic_exchange_relaxed
 #define jl_atomic_exchange(obj, desired) \
     (__extension__({ \
-            __typeof__((obj)) p = (obj); \
-            __typeof__(*p) temp = *p; \
-            *p = desired; \
-            temp; \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *p__analyzer__ = (desired); \
+            temp__analyzer__; \
         }))
 #define jl_atomic_exchange_relaxed jl_atomic_exchange
 
-#undef jl_atomic_compare_exchange
-#define jl_atomic_compare_exchange(obj, expected, desired) ((expected), jl_atomic_exchange((obj), (desired)))
-
-#undef jl_atomic_bool_compare_exchange
-#define jl_atomic_bool_compare_exchange(obj, expected, desired) ((expected) == jl_atomic_exchange((obj), (desired)))
+#undef jl_atomic_cmpswap
+#undef jl_atomic_cmpswap_relaxed
+#define jl_atomic_cmpswap(obj, expected, desired) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            __typeof__((expected)) x__analyzer__ = (expected); \
+            if (temp__analyzer__ == *x__analyzer__) \
+                *p__analyzer__ = (desired); \
+            else \
+                *x__analyzer__ = temp__analyzer__; \
+            temp__analyzer__ == *x__analyzer__; \
+        }))
+#define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
 
 #undef jl_atomic_store
 #undef jl_atomic_store_release
 #undef jl_atomic_store_relaxed
 #define jl_atomic_store(obj, val)         (*(obj) = (val))
-#define jl_atomic_store_release(obj, val) (*(obj) = (val))
-#define jl_atomic_store_relaxed(obj, val) (*(obj) = (val))
+#define jl_atomic_store_release jl_atomic_store
+#define jl_atomic_store_relaxed jl_atomic_store
 
 #undef jl_atomic_load
 #undef jl_atomic_load_acquire
 #undef jl_atomic_load_relaxed
 #define jl_atomic_load(obj)         (*(obj))
-#define jl_atomic_load_acquire(obj) (*(obj))
-#define jl_atomic_load_relaxed(obj) (*(obj))
+#define jl_atomic_load_acquire jl_atomic_load
+#define jl_atomic_load_relaxed jl_atomic_load
 
 #endif
 
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index c4d6166a5c1947..49d3cd7fe87e13 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -19,22 +19,38 @@ extern "C" {
     extern jl_value_t *jl_builtin_##name
 #endif
 
-DECLARE_BUILTIN(throw);      DECLARE_BUILTIN(is);
-DECLARE_BUILTIN(typeof);     DECLARE_BUILTIN(sizeof);
-DECLARE_BUILTIN(issubtype);  DECLARE_BUILTIN(isa);
+DECLARE_BUILTIN(applicable);
+DECLARE_BUILTIN(_apply_iterate);
 DECLARE_BUILTIN(_apply_pure);
-DECLARE_BUILTIN(_call_latest); DECLARE_BUILTIN(_apply_iterate);
+DECLARE_BUILTIN(apply_type);
+DECLARE_BUILTIN(arrayref);
+DECLARE_BUILTIN(arrayset);
+DECLARE_BUILTIN(arraysize);
 DECLARE_BUILTIN(_call_in_world);
-DECLARE_BUILTIN(isdefined);  DECLARE_BUILTIN(nfields);
-DECLARE_BUILTIN(tuple);      DECLARE_BUILTIN(svec);
-DECLARE_BUILTIN(getfield);   DECLARE_BUILTIN(setfield);
-DECLARE_BUILTIN(fieldtype);  DECLARE_BUILTIN(arrayref);
+DECLARE_BUILTIN(_call_latest);
+DECLARE_BUILTIN(replacefield);
 DECLARE_BUILTIN(const_arrayref);
-DECLARE_BUILTIN(arrayset);   DECLARE_BUILTIN(arraysize);
-DECLARE_BUILTIN(apply_type); DECLARE_BUILTIN(applicable);
-DECLARE_BUILTIN(invoke);     DECLARE_BUILTIN(_expr);
-DECLARE_BUILTIN(typeassert); DECLARE_BUILTIN(ifelse);
-DECLARE_BUILTIN(_typevar);   DECLARE_BUILTIN(_typebody);
+DECLARE_BUILTIN(_expr);
+DECLARE_BUILTIN(fieldtype);
+DECLARE_BUILTIN(getfield);
+DECLARE_BUILTIN(ifelse);
+DECLARE_BUILTIN(invoke);
+DECLARE_BUILTIN(is);
+DECLARE_BUILTIN(isa);
+DECLARE_BUILTIN(isdefined);
+DECLARE_BUILTIN(issubtype);
+DECLARE_BUILTIN(modifyfield);
+DECLARE_BUILTIN(nfields);
+DECLARE_BUILTIN(setfield);
+DECLARE_BUILTIN(sizeof);
+DECLARE_BUILTIN(svec);
+DECLARE_BUILTIN(swapfield);
+DECLARE_BUILTIN(throw);
+DECLARE_BUILTIN(tuple);
+DECLARE_BUILTIN(typeassert);
+DECLARE_BUILTIN(_typebody);
+DECLARE_BUILTIN(typeof);
+DECLARE_BUILTIN(_typevar);
 
 JL_CALLABLE(jl_f_invoke_kwsorter);
 JL_CALLABLE(jl_f__structtype);
diff --git a/src/builtins.c b/src/builtins.c
index c6abe4b8e602d5..32afff52e0b5f4 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -32,7 +32,7 @@ extern "C" {
 
 // egal and object_id ---------------------------------------------------------
 
-static int bits_equal(void *a, void *b, int sz) JL_NOTSAFEPOINT
+static int bits_equal(const void *a, const void *b, int sz) JL_NOTSAFEPOINT
 {
     switch (sz) {
     case 1:  return *(int8_t*)a == *(int8_t*)b;
@@ -76,7 +76,7 @@ static int NOINLINE compare_svec(jl_svec_t *a, jl_svec_t *b) JL_NOTSAFEPOINT
 }
 
 // See comment above for an explanation of NOINLINE.
-static int NOINLINE compare_fields(jl_value_t *a, jl_value_t *b, jl_datatype_t *dt) JL_NOTSAFEPOINT
+static int NOINLINE compare_fields(const jl_value_t *a, const jl_value_t *b, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
     size_t f, nf = jl_datatype_nfields(dt);
     for (f = 0; f < nf; f++) {
@@ -126,7 +126,7 @@ static int NOINLINE compare_fields(jl_value_t *a, jl_value_t *b, jl_datatype_t *
     return 1;
 }
 
-static int egal_types(jl_value_t *a, jl_value_t *b, jl_typeenv_t *env, int tvar_names) JL_NOTSAFEPOINT
+static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *env, int tvar_names) JL_NOTSAFEPOINT
 {
     if (a == b)
         return 1;
@@ -183,7 +183,7 @@ static int egal_types(jl_value_t *a, jl_value_t *b, jl_typeenv_t *env, int tvar_
     }
     if (dt == jl_symbol_type)
         return 0;
-    assert(!dt->mutabl);
+    assert(!dt->name->mutabl);
     return jl_egal__bits(a, b, dt);
 }
 
@@ -192,13 +192,19 @@ JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b)
     return egal_types(a, b, NULL, 0);
 }
 
-JL_DLLEXPORT int (jl_egal)(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
+JL_DLLEXPORT int (jl_egal)(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
 {
     // warning: a,b may NOT have been gc-rooted by the caller
     return jl_egal(a, b);
 }
 
-int jl_egal__special(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
+    // warning: a,b may NOT have been gc-rooted by the caller
+    return jl_egal__unboxed_(a, b, dt);
+}
+
+int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
     if (dt == jl_simplevector_type)
         return compare_svec((jl_svec_t*)a, (jl_svec_t*)b);
@@ -221,7 +227,7 @@ int jl_egal__special(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNR
     return 0;
 }
 
-int jl_egal__bits(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
     size_t sz = jl_datatype_size(dt);
     if (sz == 0)
@@ -324,7 +330,7 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN
     }
     if (tv == jl_symbol_type)
         return ((jl_sym_t*)v)->hash;
-    assert(!tv->mutabl);
+    assert(!tv->name->mutabl);
     return immut_id_(tv, v, tv->hash);
 }
 
@@ -358,7 +364,7 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
                 uint8_t sel = ((uint8_t*)vo)[jl_field_size(dt, f) - 1];
                 fieldtype = (jl_datatype_t*)jl_nth_union_component((jl_value_t*)fieldtype, sel);
             }
-            assert(jl_is_datatype(fieldtype) && !fieldtype->abstract && !fieldtype->mutabl);
+            assert(jl_is_datatype(fieldtype) && !fieldtype->name->abstract && !fieldtype->name->mutabl);
             int32_t first_ptr = fieldtype->layout->first_ptr;
             if (first_ptr >= 0 && ((jl_value_t**)vo)[first_ptr] == NULL) {
                 // If the field is a inline immutable that can be can be undef
@@ -391,7 +397,7 @@ static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) J
         return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
 #endif
     }
-    if (dt->mutabl)
+    if (dt->name->mutabl)
         return inthash((uintptr_t)v);
     return immut_id_(dt, v, dt->hash);
 }
@@ -451,7 +457,7 @@ JL_CALLABLE(jl_f_sizeof)
     if (jl_is_datatype(x)) {
         jl_datatype_t *dx = (jl_datatype_t*)x;
         if (dx->layout == NULL) {
-            if (dx->abstract)
+            if (dx->name->abstract)
                 jl_errorf("Abstract type %s does not have a definite size.", jl_symbol_name(dx->name->name));
             else
                 jl_errorf("Argument is an incomplete %s type and does not have a definite size.", jl_symbol_name(dx->name->name));
@@ -473,7 +479,7 @@ JL_CALLABLE(jl_f_sizeof)
         return jl_box_long((1+jl_svec_len(x))*sizeof(void*));
     jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(x);
     assert(jl_is_datatype(dt));
-    assert(!dt->abstract);
+    assert(!dt->name->abstract);
     return jl_box_long(jl_datatype_size(dt));
 }
 
@@ -718,24 +724,24 @@ JL_CALLABLE(jl_f__apply_iterate)
 // this is like `_apply`, but with quasi-exact checks to make sure it is pure
 JL_CALLABLE(jl_f__apply_pure)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    int last_in = ptls->in_pure_callback;
+    jl_task_t *ct = jl_current_task;
+    int last_in = ct->ptls->in_pure_callback;
     jl_value_t *ret = NULL;
     JL_TRY {
-        ptls->in_pure_callback = 1;
+        ct->ptls->in_pure_callback = 1;
         // because this function was declared pure,
         // we should be allowed to run it in any world
         // so we run it in the newest world;
         // because, why not :)
         // and `promote` works better this way
-        size_t last_age = ptls->world_age;
-        ptls->world_age = jl_world_counter;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_world_counter;
         ret = do_apply(args, nargs, NULL);
-        ptls->world_age = last_age;
-        ptls->in_pure_callback = last_in;
+        ct->world_age = last_age;
+        ct->ptls->in_pure_callback = last_in;
     }
     JL_CATCH {
-        ptls->in_pure_callback = last_in;
+        ct->ptls->in_pure_callback = last_in;
         jl_rethrow();
     }
     return ret;
@@ -744,12 +750,12 @@ JL_CALLABLE(jl_f__apply_pure)
 // this is like a regular call, but always runs in the newest world
 JL_CALLABLE(jl_f__call_latest)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
-    if (!ptls->in_pure_callback)
-        ptls->world_age = jl_world_counter;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    if (!ct->ptls->in_pure_callback)
+        ct->world_age = jl_world_counter;
     jl_value_t *ret = jl_apply(args, nargs);
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     return ret;
 }
 
@@ -758,15 +764,15 @@ JL_CALLABLE(jl_f__call_latest)
 JL_CALLABLE(jl_f__call_in_world)
 {
     JL_NARGSV(_apply_in_world, 2);
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
     JL_TYPECHK(_apply_in_world, ulong, args[0]);
     size_t world = jl_unbox_ulong(args[0]);
     world = world <= jl_world_counter ? world : jl_world_counter;
-    if (!ptls->in_pure_callback)
-        ptls->world_age = world;
+    if (!ct->ptls->in_pure_callback)
+        ct->world_age = world;
     jl_value_t *ret = jl_apply(&args[1], nargs - 1);
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     return ret;
 }
 
@@ -781,10 +787,10 @@ JL_CALLABLE(jl_f_tuple)
     JL_GC_PROMISE_ROOTED(tt); // it is a concrete type
     if (tt->instance != NULL)
         return tt->instance;
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(tt), tt);
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(tt), tt);
     for (i = 0; i < nargs; i++)
-        set_nth_field(tt, (void*)jv, i, args[i]);
+        set_nth_field(tt, jv, i, args[i], 0);
     return jv;
 }
 
@@ -802,64 +808,183 @@ JL_CALLABLE(jl_f_svec)
 
 // struct operations ------------------------------------------------------------
 
+enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing)
+{
+    if (order == not_atomic_sym)
+        return jl_memory_order_notatomic;
+    if (order == unordered_sym && (loading ^ storing))
+        return jl_memory_order_unordered;
+    if (order == monotonic_sym && (loading || storing))
+        return jl_memory_order_monotonic;
+    if (order == acquire_sym && loading)
+        return jl_memory_order_acquire;
+    if (order == release_sym && storing)
+        return jl_memory_order_release;
+    if (order == acquire_release_sym && loading && storing)
+        return jl_memory_order_acq_rel;
+    if (order == sequentially_consistent_sym)
+        return jl_memory_order_seq_cst;
+    return jl_memory_order_invalid;
+}
+
+enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing)
+{
+    enum jl_memory_order mo = jl_get_atomic_order(order, loading, storing);
+    if (mo < 0) // invalid
+        jl_atomic_error("invalid atomic ordering");
+    return mo;
+}
+
+static inline size_t get_checked_fieldindex(const char *name, jl_datatype_t *st, jl_value_t *v, jl_value_t *arg, int mutabl)
+{
+    if (mutabl) {
+        if (st == jl_module_type)
+            jl_error("cannot assign variables in other modules");
+        if (!st->name->mutabl)
+            jl_errorf("%s: immutable struct of type %s cannot be changed", name, jl_symbol_name(st->name->name));
+    }
+    size_t idx;
+    if (jl_is_long(arg)) {
+        idx = jl_unbox_long(arg) - 1;
+        if (idx >= jl_datatype_nfields(st))
+            jl_bounds_error(v, arg);
+    }
+    else {
+        JL_TYPECHKS(name, symbol, arg);
+        idx = jl_field_index(st, (jl_sym_t*)arg, 1);
+    }
+    return idx;
+}
+
 JL_CALLABLE(jl_f_getfield)
 {
-    if (nargs == 3) {
-        JL_TYPECHK(getfield, bool, args[2]);
-        nargs -= 1;
+    enum jl_memory_order order = jl_memory_order_unspecified;
+    JL_NARGS(getfield, 2, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(getfield, symbol, args[3]);
+        JL_TYPECHK(getfield, bool, args[4]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 0);
+    }
+    else if (nargs == 3) {
+        if (!jl_is_bool(args[2])) {
+            JL_TYPECHK(getfield, symbol, args[2]);
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
+        }
     }
-    JL_NARGS(getfield, 2, 2);
     jl_value_t *v = args[0];
-    jl_value_t *vt = (jl_value_t*)jl_typeof(v);
+    jl_value_t *vt = jl_typeof(v);
     if (vt == (jl_value_t*)jl_module_type) {
         JL_TYPECHK(getfield, symbol, args[1]);
-        return jl_eval_global_var((jl_module_t*)v, (jl_sym_t*)args[1]);
-    }
-    if (!jl_is_datatype(vt))
-        jl_type_error("getfield", (jl_value_t*)jl_datatype_type, v);
-    jl_datatype_t *st = (jl_datatype_t*)vt;
-    size_t idx;
-    if (jl_is_long(args[1])) {
-        idx = jl_unbox_long(args[1])-1;
-        if (idx >= jl_datatype_nfields(st))
-            jl_bounds_error(args[0], args[1]);
+        v = jl_eval_global_var((jl_module_t*)v, (jl_sym_t*)args[1]); // is seq_cst already
     }
     else {
-        JL_TYPECHK(getfield, symbol, args[1]);
-        jl_sym_t *fld = (jl_sym_t*)args[1];
-        idx = jl_field_index(st, fld, 1);
+        jl_datatype_t *st = (jl_datatype_t*)vt;
+        size_t idx = get_checked_fieldindex("getfield", st, v, args[1], 0);
+        int isatomic = jl_field_isatomic(st, idx);
+        if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified)
+            jl_atomic_error("getfield: non-atomic field cannot be accessed atomically");
+        if (isatomic && order == jl_memory_order_notatomic)
+            jl_atomic_error("getfield: atomic field cannot be accessed non-atomically");
+        v = jl_get_nth_field_checked(v, idx);
+        if (order >= jl_memory_order_acq_rel || order == jl_memory_order_acquire)
+            jl_fence(); // `v` already had at least consume ordering
     }
-    return jl_get_nth_field_checked(v, idx);
+    return v;
 }
 
 JL_CALLABLE(jl_f_setfield)
 {
-    JL_NARGS(setfield!, 3, 3);
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(setfield!, 3, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(getfield, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 0, 1);
+    }
     jl_value_t *v = args[0];
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
-    assert(jl_is_datatype(st));
-    if (st == jl_module_type)
-        jl_error("cannot assign variables in other modules");
-    if (!st->mutabl)
-        jl_errorf("setfield! immutable struct of type %s cannot be changed", jl_symbol_name(st->name->name));
-    size_t idx;
-    if (jl_is_long(args[1])) {
-        idx = jl_unbox_long(args[1]) - 1;
-        if (idx >= jl_datatype_nfields(st))
-            jl_bounds_error(args[0], args[1]);
-    }
-    else {
-        JL_TYPECHK(setfield!, symbol, args[1]);
-        idx = jl_field_index(st, (jl_sym_t*)args[1], 1);
-    }
+    size_t idx = get_checked_fieldindex("setfield!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "setfield!: atomic field cannot be written non-atomically"
+                                 : "setfield!: non-atomic field cannot be written atomically");
     jl_value_t *ft = jl_field_type_concrete(st, idx);
-    if (!jl_isa(args[2], ft)) {
+    if (!jl_isa(args[2], ft))
         jl_type_error("setfield!", ft, args[2]);
-    }
-    set_nth_field(st, (void*)v, idx, args[2]);
+    if (order >= jl_memory_order_acq_rel || order == jl_memory_order_release)
+        jl_fence(); // `st->[idx]` will have at least relaxed ordering
+    set_nth_field(st, v, idx, args[2], isatomic);
     return args[2];
 }
 
+JL_CALLABLE(jl_f_swapfield)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(swapfield!, 3, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(swapfield!, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+    }
+    jl_value_t *v = args[0];
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t idx = get_checked_fieldindex("swapfield!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "swapfield!: atomic field cannot be written non-atomically"
+                                 : "swapfield!: non-atomic field cannot be written atomically");
+    v = swap_nth_field(st, v, idx, args[2], isatomic); // always seq_cst, if isatomic needed at all
+    return v;
+}
+
+JL_CALLABLE(jl_f_modifyfield)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(modifyfield!, 4, 5);
+    if (nargs == 5) {
+        JL_TYPECHK(modifyfield!, symbol, args[4]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 1);
+    }
+    jl_value_t *v = args[0];
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t idx = get_checked_fieldindex("modifyfield!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
+                                 : "modifyfield!: non-atomic field cannot be written atomically");
+    v = modify_nth_field(st, v, idx, args[2], args[3], isatomic); // always seq_cst, if isatomic needed at all
+    return v;
+}
+
+JL_CALLABLE(jl_f_replacefield)
+{
+    enum jl_memory_order success_order = jl_memory_order_notatomic;
+    JL_NARGS(replacefield!, 4, 6);
+    if (nargs >= 5) {
+        JL_TYPECHK(replacefield!, symbol, args[4]);
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 1);
+    }
+    enum jl_memory_order failure_order = success_order;
+    if (nargs == 6) {
+        JL_TYPECHK(replacefield!, symbol, args[5]);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[5], 1, 0);
+    }
+    if (failure_order > success_order)
+        jl_atomic_error("invalid atomic ordering");
+    // TODO: filter more invalid ordering combinations?
+    jl_value_t *v = args[0];
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t idx = get_checked_fieldindex("replacefield!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (success_order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "replacefield!: atomic field cannot be written non-atomically"
+                                 : "replacefield!: non-atomic field cannot be written atomically");
+    if (isatomic == (failure_order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
+                                 : "replacefield!: non-atomic field cannot be accessed atomically");
+    v = replace_nth_field(st, v, idx, args[2], args[3], isatomic); // always seq_cst, if isatomic needed at all
+    return v;
+}
+
+
 static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
 {
     if (jl_is_unionall(t)) {
@@ -939,11 +1064,10 @@ static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
 
 JL_CALLABLE(jl_f_fieldtype)
 {
+    JL_NARGS(fieldtype, 2, 3);
     if (nargs == 3) {
         JL_TYPECHK(fieldtype, bool, args[2]);
-        nargs -= 1;
     }
-    JL_NARGS(fieldtype, 2, 2);
     return get_fieldtype(args[0], args[1], 1);
 }
 
@@ -958,29 +1082,53 @@ JL_CALLABLE(jl_f_isdefined)
 {
     jl_module_t *m = NULL;
     jl_sym_t *s = NULL;
-    JL_NARGS(isdefined, 2, 2);
-    if (!jl_is_module(args[0])) {
-        jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(args[0]);
-        assert(jl_is_datatype(vt));
-        size_t idx;
-        if (jl_is_long(args[1])) {
-            idx = jl_unbox_long(args[1]) - 1;
-            if (idx >= jl_datatype_nfields(vt))
-                return jl_false;
+    JL_NARGS(isdefined, 2, 3);
+    enum jl_memory_order order = jl_memory_order_unspecified;
+    if (nargs == 3) {
+        JL_TYPECHK(isdefined, symbol, args[2]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
+    }
+    if (jl_is_module(args[0])) {
+        JL_TYPECHK(isdefined, symbol, args[1]);
+        m = (jl_module_t*)args[0];
+        s = (jl_sym_t*)args[1];
+        return jl_boundp(m, s) ? jl_true : jl_false; // is seq_cst already
+    }
+    jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(args[0]);
+    assert(jl_is_datatype(vt));
+    size_t idx;
+    if (jl_is_long(args[1])) {
+        idx = jl_unbox_long(args[1]) - 1;
+        if (idx >= jl_datatype_nfields(vt)) {
+            if (order != jl_memory_order_unspecified)
+                jl_atomic_error("isdefined: atomic ordering cannot be specified for nonexistent field");
+            return jl_false;
         }
-        else {
-            JL_TYPECHK(isdefined, symbol, args[1]);
-            idx = jl_field_index(vt, (jl_sym_t*)args[1], 0);
-            if ((int)idx == -1)
-                return jl_false;
+    }
+    else {
+        JL_TYPECHK(isdefined, symbol, args[1]);
+        idx = jl_field_index(vt, (jl_sym_t*)args[1], 0);
+        if ((int)idx == -1) {
+            if (order != jl_memory_order_unspecified)
+                jl_atomic_error("isdefined: atomic ordering cannot be specified for nonexistent field");
+            return jl_false;
         }
-        return jl_field_isdefined(args[0], idx) ? jl_true : jl_false;
     }
-    JL_TYPECHK(isdefined, module, args[0]);
-    JL_TYPECHK(isdefined, symbol, args[1]);
-    m = (jl_module_t*)args[0];
-    s = (jl_sym_t*)args[1];
-    return jl_boundp(m, s) ? jl_true : jl_false;
+    int isatomic = jl_field_isatomic(vt, idx);
+    if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified)
+        jl_atomic_error("isdefined: non-atomic field cannot be accessed atomically");
+    if (isatomic && order == jl_memory_order_notatomic)
+        jl_atomic_error("isdefined: atomic field cannot be accessed non-atomically");
+    int v = jl_field_isdefined(args[0], idx);
+    if (v == 2) {
+        if (order > jl_memory_order_notatomic)
+            jl_fence(); // isbits case has no ordering already
+    }
+    else {
+        if (order >= jl_memory_order_acq_rel || order == jl_memory_order_acquire)
+            jl_fence(); // `v` already gave at least consume ordering
+    }
+    return v ? jl_true : jl_false;
 }
 
 
@@ -1061,7 +1209,7 @@ JL_CALLABLE(jl_f_apply_type)
 JL_CALLABLE(jl_f_applicable)
 {
     JL_NARGSV(applicable, 1);
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     return jl_method_lookup(args, nargs, world) != NULL ? jl_true : jl_false;
 }
 
@@ -1127,10 +1275,10 @@ JL_CALLABLE(jl_f_invoke_kwsorter)
 
 jl_expr_t *jl_exprn(jl_sym_t *head, size_t n)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_array_t *ar = jl_alloc_vec_any(n);
     JL_GC_PUSH1(&ar);
-    jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t),
+    jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ct->ptls, sizeof(jl_expr_t),
                                             jl_expr_type);
     ex->head = head;
     ex->args = ar;
@@ -1140,14 +1288,14 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n)
 
 JL_CALLABLE(jl_f__expr)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     JL_NARGSV(Expr, 1);
     JL_TYPECHK(Expr, symbol, args[0]);
     jl_array_t *ar = jl_alloc_vec_any(nargs-1);
     JL_GC_PUSH1(&ar);
     for(size_t i=0; i < nargs-1; i++)
         jl_array_ptr_set(ar, i, args[i+1]);
-    jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t),
+    jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ct->ptls, sizeof(jl_expr_t),
                                             jl_expr_type);
     ex->head = (jl_sym_t*)args[0];
     ex->args = ar;
@@ -1162,8 +1310,8 @@ JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_
         jl_type_error_rt("TypeVar", "lower bound", (jl_value_t *)jl_type_type, lb);
     if (ub != (jl_value_t *)jl_any_type && !jl_is_type(ub) && !jl_is_typevar(ub))
         jl_type_error_rt("TypeVar", "upper bound", (jl_value_t *)jl_type_type, ub);
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ptls, sizeof(jl_tvar_t), jl_tvar_type);
+    jl_task_t *ct = jl_current_task;
+    jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ct->ptls, sizeof(jl_tvar_t), jl_tvar_type);
     tv->name = name;
     tv->lb = lb;
     tv->ub = ub;
@@ -1247,18 +1395,20 @@ JL_CALLABLE(jl_f_arrayset)
 
 JL_CALLABLE(jl_f__structtype)
 {
-    JL_NARGS(_structtype, 6, 6);
+    JL_NARGS(_structtype, 7, 7);
     JL_TYPECHK(_structtype, module, args[0]);
     JL_TYPECHK(_structtype, symbol, args[1]);
     JL_TYPECHK(_structtype, simplevector, args[2]);
     JL_TYPECHK(_structtype, simplevector, args[3]);
-    JL_TYPECHK(_structtype, bool, args[4]);
-    JL_TYPECHK(_structtype, long, args[5]);
+    JL_TYPECHK(_structtype, simplevector, args[4]);
+    JL_TYPECHK(_structtype, bool, args[5]);
+    JL_TYPECHK(_structtype, long, args[6]);
     jl_value_t *fieldnames = args[3];
+    jl_value_t *fieldattrs = args[4];
     jl_datatype_t *dt = NULL;
     dt = jl_new_datatype((jl_sym_t*)args[1], (jl_module_t*)args[0], NULL, (jl_svec_t*)args[2],
-                         (jl_svec_t*)fieldnames, NULL,
-                         0, args[4]==jl_true ? 1 : 0, jl_unbox_long(args[5]));
+                         (jl_svec_t*)fieldnames, NULL, (jl_svec_t*)fieldattrs,
+                         0, args[5]==jl_true ? 1 : 0, jl_unbox_long(args[6]));
     return dt->name->wrapper;
 }
 
@@ -1337,6 +1487,48 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
     return 1;
 }
 
+// If a field can reference its enclosing type, then the inlining
+// recursive depth is not statically bounded for some layouts, so we cannot
+// inline it. The only way fields can reference this type (due to
+// syntax-enforced restrictions) is via being passed as a type parameter. Thus
+// we can conservatively check this by examining only the parameters of the
+// dependent types.
+// affects_layout is a hack introduced by #35275 to workaround a problem
+// introduced by #34223: it checks whether we will potentially need to
+// compute the layout of the object before we have fully computed the types of
+// the fields during recursion over the allocation of the parameters for the
+// field types (of the concrete subtypes)
+static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout) JL_NOTSAFEPOINT
+{
+    if (jl_is_uniontype(p))
+        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout) ||
+               references_name(((jl_uniontype_t*)p)->b, name, affects_layout);
+    if (jl_is_unionall(p))
+        return references_name((jl_value_t*)((jl_unionall_t*)p)->var->lb, name, 0) ||
+               references_name((jl_value_t*)((jl_unionall_t*)p)->var->ub, name, 0) ||
+               references_name(((jl_unionall_t*)p)->body, name, affects_layout);
+    if (jl_is_typevar(p))
+        return 0; // already checked by unionall, if applicable
+    if (jl_is_datatype(p)) {
+        jl_datatype_t *dp = (jl_datatype_t*)p;
+        if (affects_layout && dp->name == name)
+            return 1;
+        // affects_layout checks whether we will need to attempt to layout this
+        // type (based on whether all copies of it have the same layout) in
+        // that case, we still need to check the recursive parameters for
+        // layout recursion happening also, but we know it won't itself cause
+        // problems for the layout computation
+        affects_layout = ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->layout == NULL;
+        size_t i, l = jl_nparams(p);
+        for (i = 0; i < l; i++) {
+            if (references_name(jl_tparam(p, i), name, affects_layout))
+                return 1;
+        }
+    }
+    return 0;
+}
+
+
 JL_CALLABLE(jl_f__typebody)
 {
     JL_NARGS(_typebody!, 1, 2);
@@ -1361,6 +1553,22 @@ JL_CALLABLE(jl_f__typebody)
         else {
             dt->types = (jl_svec_t*)ft;
             jl_gc_wb(dt, ft);
+            // If a supertype can reference the same type, then we may not be
+            // able to compute the layout of the object before needing to
+            // publish it, so we must assume it cannot be inlined, if that
+            // check passes, then we also still need to check the fields too.
+            if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 1))) {
+                int mayinlinealloc = 1;
+                size_t i;
+                for (i = 0; i < nf; i++) {
+                    jl_value_t *fld = jl_svecref(ft, i);
+                    if (references_name(fld, dt->name, 1)) {
+                        mayinlinealloc = 0;
+                        break;
+                    }
+                }
+                dt->name->mayinlinealloc = mayinlinealloc;
+            }
         }
     }
 
@@ -1386,10 +1594,14 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     jl_datatype_t *dtb = (jl_datatype_t*)jl_unwrap_unionall(tb);
     if (!(jl_typeof(dta) == jl_typeof(dtb) &&
           dta->name->name == dtb->name->name &&
-          dta->abstract == dtb->abstract &&
-          dta->mutabl == dtb->mutabl &&
+          dta->name->abstract == dtb->name->abstract &&
+          dta->name->mutabl == dtb->name->mutabl &&
+          dta->name->n_uninitialized == dtb->name->n_uninitialized &&
           (jl_svec_len(jl_field_names(dta)) != 0 || dta->size == dtb->size) &&
-          dta->ninitialized == dtb->ninitialized &&
+          (dta->name->atomicfields == NULL
+           ? dtb->name->atomicfields == NULL
+           : (dtb->name->atomicfields != NULL &&
+              memcmp(dta->name->atomicfields, dtb->name->atomicfields, (jl_svec_len(dta->name->names) + 31) / 32 * sizeof(uint32_t)) == 0)) &&
           jl_egal((jl_value_t*)jl_field_names(dta), (jl_value_t*)jl_field_names(dtb)) &&
           jl_nparams(dta) == jl_nparams(dtb)))
         return 0;
@@ -1440,7 +1652,6 @@ static unsigned intrinsic_nargs[num_intrinsics];
 
 JL_CALLABLE(jl_f_intrinsic_call)
 {
-    JL_NARGSV(intrinsic_call, 1);
     JL_TYPECHK(intrinsic_call, intrinsic, F);
     enum intrinsic f = (enum intrinsic)*(uint32_t*)jl_data_ptr(F);
     if (f == cglobal && nargs == 1)
@@ -1456,6 +1667,7 @@ JL_CALLABLE(jl_f_intrinsic_call)
         jl_value_t *(*call2)(jl_value_t*, jl_value_t*);
         jl_value_t *(*call3)(jl_value_t*, jl_value_t*, jl_value_t*);
         jl_value_t *(*call4)(jl_value_t*, jl_value_t*, jl_value_t*, jl_value_t*);
+        jl_value_t *(*call5)(jl_value_t*, jl_value_t*, jl_value_t*, jl_value_t*, jl_value_t*);
     } fptr;
     fptr.fptr = runtime_fp[f];
     switch (fargs) {
@@ -1467,6 +1679,8 @@ JL_CALLABLE(jl_f_intrinsic_call)
             return fptr.call3(args[0], args[1], args[2]);
         case 4:
             return fptr.call4(args[0], args[1], args[2], args[3]);
+        case 5:
+            return fptr.call5(args[0], args[1], args[2], args[3], args[4]);
         default:
             assert(0 && "unexpected number of arguments to an intrinsic function");
     }
@@ -1570,6 +1784,9 @@ void jl_init_primitives(void) JL_GC_DISABLED
     // field access
     jl_builtin_getfield = add_builtin_func("getfield",  jl_f_getfield);
     jl_builtin_setfield = add_builtin_func("setfield!",  jl_f_setfield);
+    jl_builtin_swapfield = add_builtin_func("swapfield!",  jl_f_swapfield);
+    jl_builtin_modifyfield = add_builtin_func("modifyfield!",  jl_f_modifyfield);
+    jl_builtin_replacefield = add_builtin_func("replacefield!",  jl_f_replacefield);
     jl_builtin_fieldtype = add_builtin_func("fieldtype", jl_f_fieldtype);
     jl_builtin_nfields = add_builtin_func("nfields", jl_f_nfields);
     jl_builtin_isdefined = add_builtin_func("isdefined", jl_f_isdefined);
diff --git a/src/ccall.cpp b/src/ccall.cpp
index 66ab84c264f3ab..d561e1b8d6dd61 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -1,9 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 // --- the ccall, cglobal, and llvm intrinsics ---
-#include "llvm/Support/Path.h" // for llvm::sys::path
-#include <llvm/Bitcode/BitcodeReader.h>
-#include <llvm/Linker/Linker.h>
 
 #ifdef _OS_WINDOWS_
 extern const char jl_crtdll_basename[];
@@ -290,9 +287,9 @@ static Value *emit_plt(
 class AbiLayout {
 public:
     virtual ~AbiLayout() {}
-    virtual bool use_sret(jl_datatype_t *ty) = 0;
-    virtual bool needPassByRef(jl_datatype_t *ty, AttrBuilder&) = 0;
-    virtual Type *preferred_llvm_type(jl_datatype_t *ty, bool isret) const = 0;
+    virtual bool use_sret(jl_datatype_t *ty, LLVMContext &ctx) = 0;
+    virtual bool needPassByRef(jl_datatype_t *ty, AttrBuilder&, LLVMContext &ctx) = 0;
+    virtual Type *preferred_llvm_type(jl_datatype_t *ty, bool isret, LLVMContext &ctx) const = 0;
 };
 
 // Determine if object of bitstype ty maps to a native x86 SIMD type (__m128, __m256, or __m512) in C
@@ -493,7 +490,7 @@ static Value *julia_to_native(
         assert(!byRef); // don't expect any ABI to pass pointers by pointer
         return boxed(ctx, jvinfo);
     }
-    assert(jl_is_datatype(jlto) && julia_struct_has_layout((jl_datatype_t*)jlto, jlto_env));
+    assert(jl_is_datatype(jlto) && jl_struct_try_layout((jl_datatype_t*)jlto));
 
     typeassert_input(ctx, jvinfo, jlto, jlto_env, argn);
     if (!byRef)
@@ -1008,14 +1005,14 @@ std::string generate_func_sig(const char *fname)
 
     if (type_is_ghost(lrt)) {
         prt = lrt = T_void;
-        abi->use_sret(jl_nothing_type);
+        abi->use_sret(jl_nothing_type, jl_LLVMContext);
     }
     else {
         if (!jl_is_datatype(rt) || ((jl_datatype_t*)rt)->layout == NULL || jl_is_layout_opaque(((jl_datatype_t*)rt)->layout) || jl_is_cpointer_type(rt) || retboxed) {
             prt = lrt; // passed as pointer
-            abi->use_sret(jl_voidpointer_type);
+            abi->use_sret(jl_voidpointer_type, jl_LLVMContext);
         }
-        else if (abi->use_sret((jl_datatype_t*)rt)) {
+        else if (abi->use_sret((jl_datatype_t*)rt, jl_LLVMContext)) {
             AttrBuilder retattrs = AttrBuilder();
 #if !defined(_OS_WINDOWS_) // llvm used to use the old mingw ABI, skipping this marking works around that difference
 #if JL_LLVM_VERSION < 120000
@@ -1031,7 +1028,7 @@ std::string generate_func_sig(const char *fname)
             prt = lrt;
         }
         else {
-            prt = abi->preferred_llvm_type((jl_datatype_t*)rt, true);
+            prt = abi->preferred_llvm_type((jl_datatype_t*)rt, true, jl_LLVMContext);
             if (prt == NULL)
                 prt = lrt;
         }
@@ -1065,7 +1062,7 @@ std::string generate_func_sig(const char *fname)
                 }
             }
 
-            t = _julia_struct_to_llvm(ctx, tti, unionall_env, &isboxed, llvmcall);
+            t = _julia_struct_to_llvm(ctx, tti, &isboxed, llvmcall);
             if (t == NULL || t == T_void) {
                 return make_errmsg(fname, i + 1, " doesn't correspond to a C type");
             }
@@ -1077,7 +1074,7 @@ std::string generate_func_sig(const char *fname)
         }
 
         // Whether or not LLVM wants us to emit a pointer to the data
-        bool byRef = abi->needPassByRef((jl_datatype_t*)tti, ab);
+        bool byRef = abi->needPassByRef((jl_datatype_t*)tti, ab, jl_LLVMContext);
 
         if (jl_is_cpointer_type(tti)) {
             pat = t;
@@ -1086,7 +1083,7 @@ std::string generate_func_sig(const char *fname)
             pat = PointerType::get(t, AddressSpace::Derived);
         }
         else {
-            pat = abi->preferred_llvm_type((jl_datatype_t*)tti, false);
+            pat = abi->preferred_llvm_type((jl_datatype_t*)tti, false, jl_LLVMContext);
             if (pat == NULL)
                 pat = t;
         }
@@ -1211,7 +1208,7 @@ static const std::string verify_ccall_sig(jl_value_t *&rt, jl_value_t *at,
         rt = (jl_value_t*)jl_any_type;
     }
 
-    lrt = _julia_struct_to_llvm(ctx, rt, unionall_env, &retboxed, llvmcall);
+    lrt = _julia_struct_to_llvm(ctx, rt, &retboxed, llvmcall);
     if (lrt == NULL)
         return "return type doesn't correspond to a C type";
 
@@ -1287,22 +1284,6 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     };
 #define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), #name)
 
-    static jl_ptls_t (*ptls_getter)(void) = [] {
-    // directly accessing the address of an ifunc can cause compile-time linker issues
-    // on some configurations (e.g. AArch64 + -Bsymbolic-functions), so we guard the
-    // `&jl_get_ptls_states` within this `#ifdef` guard, and use a more roundabout
-    // method involving `jl_dlsym()` on Linux platforms instead.
-#ifdef _OS_LINUX_
-        jl_ptls_t (*p)(void);
-        void *handle = jl_dlopen(nullptr, 0);
-        jl_dlsym(handle, "jl_get_ptls_states", (void **)&p, 0);
-        jl_dlclose(handle);
-        return p;
-#else
-        return &jl_get_ptls_states;
-#endif
-    }();
-
     // emit arguments
     jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nccallargs);
     for (size_t i = 0; i < nccallargs; i++) {
@@ -1405,7 +1386,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
             isboxed = false;
         }
         else {
-            largty = _julia_struct_to_llvm(&ctx.emission_context, tti, unionall, &isboxed, llvmcall);
+            largty = _julia_struct_to_llvm(&ctx.emission_context, tti, &isboxed, llvmcall);
         }
         if (isboxed) {
             ary = boxed(ctx, argv[0]);
@@ -1475,27 +1456,27 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         JL_GC_POP();
         ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
         emit_signal_fence(ctx);
-        ctx.builder.CreateLoad(T_size, ctx.signalPage, true);
+        ctx.builder.CreateLoad(T_size, get_current_signal_page(ctx), true);
         emit_signal_fence(ctx);
         return ghostValue(jl_nothing_type);
     }
-    else if (_is_libjulia_func((uintptr_t)ptls_getter, "jl_get_ptls_states")) {
+    else if (is_libjulia_func("jl_get_ptls_states")) {
         assert(lrt == T_size);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx,
-            ctx.builder.CreatePtrToInt(ctx.ptlsStates, lrt),
+            ctx.builder.CreatePtrToInt(get_current_ptls(ctx), lrt),
             retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_threadid)) {
         assert(lrt == T_int16);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        Value *ptls_i16 = emit_bitcast(ctx, ctx.ptlsStates, T_pint16);
-        const int tid_offset = offsetof(jl_tls_states_t, tid);
-        Value *ptid = ctx.builder.CreateInBoundsGEP(ptls_i16, ConstantInt::get(T_size, tid_offset / 2));
+        Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), T_pint16);
+        const int tid_offset = offsetof(jl_task_t, tid);
+        Value *ptid = ctx.builder.CreateInBoundsGEP(ptask_i16, ConstantInt::get(T_size, tid_offset / sizeof(int16_t)));
         LoadInst *tid = ctx.builder.CreateAlignedLoad(ptid, Align(sizeof(int16_t)));
-        tbaa_decorate(tbaa_const, tid);
+        tbaa_decorate(tbaa_gcframe, tid);
         return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_gc_disable_finalizers_internal)
@@ -1504,7 +1485,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 #endif
              ) {
         JL_GC_POP();
-        Value *ptls_i32 = emit_bitcast(ctx, ctx.ptlsStates, T_pint32);
+        Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), T_pint32);
         const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited);
         Value *pfinh = ctx.builder.CreateInBoundsGEP(ptls_i32, ConstantInt::get(T_size, finh_offset / 4));
         LoadInst *finh = ctx.builder.CreateAlignedLoad(pfinh, Align(sizeof(int32_t)));
@@ -1524,18 +1505,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         assert(lrt == T_prjlvalue);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_pprjlvalue);
-        const int ct_offset = offsetof(jl_tls_states_t, current_task);
-        Value *pct = ctx.builder.CreateInBoundsGEP(ptls_pv, ConstantInt::get(T_size, ct_offset / sizeof(void*)));
-        LoadInst *ct = ctx.builder.CreateAlignedLoad(pct, Align(sizeof(void*)));
-        tbaa_decorate(tbaa_const, ct);
+        auto ct = track_pjlvalue(ctx, emit_bitcast(ctx, get_current_task(ctx), T_pjlvalue));
         return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_set_next_task)) {
         assert(lrt == T_void);
         assert(!isVa && !llvmcall && nccallargs == 1);
         JL_GC_POP();
-        Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_ppjlvalue);
+        Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), T_ppjlvalue);
         const int nt_offset = offsetof(jl_tls_states_t, next_task);
         Value *pnt = ctx.builder.CreateInBoundsGEP(ptls_pv, ConstantInt::get(T_size, nt_offset / sizeof(void*)));
         ctx.builder.CreateStore(emit_pointer_from_objref(ctx, boxed(ctx, argv[0])), pnt);
@@ -1576,7 +1553,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
                 checkBB, contBB);
         ctx.builder.SetInsertPoint(checkBB);
         ctx.builder.CreateLoad(
-                ctx.builder.CreateConstInBoundsGEP1_32(T_size, ctx.signalPage, -1),
+                ctx.builder.CreateConstInBoundsGEP1_32(T_size, get_current_signal_page(ctx), -1),
                 true);
         ctx.builder.CreateBr(contBB);
         ctx.f->getBasicBlockList().push_back(contBB);
@@ -1717,19 +1694,11 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 
         ctx.builder.CreateMemCpy(
                 emit_inttoptr(ctx, destp, T_pint8),
-#if JL_LLVM_VERSION >= 100000
                 MaybeAlign(1),
-#else
-                1,
-#endif
                 emit_inttoptr(ctx,
                     emit_unbox(ctx, T_size, src, (jl_value_t*)jl_voidpointer_type),
                     T_pint8),
-#if JL_LLVM_VERSION >= 100000
                 MaybeAlign(0),
-#else
-                0,
-#endif
                 emit_unbox(ctx, T_size, n, (jl_value_t*)jl_ulong_type),
                 false);
         JL_GC_POP();
@@ -1739,7 +1708,16 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     else if (is_libjulia_func(jl_object_id) && nccallargs == 1 &&
             rt == (jl_value_t*)jl_ulong_type) {
         jl_cgval_t val = argv[0];
-        if (!val.isboxed) {
+        if (val.typ == (jl_value_t*)jl_symbol_type) {
+            JL_GC_POP();
+            const int hash_offset = offsetof(jl_sym_t, hash);
+            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), T_psize);
+            Value *ph2 = ctx.builder.CreateInBoundsGEP(ph1, ConstantInt::get(T_size, hash_offset / sizeof(size_t)));
+            LoadInst *hashval = ctx.builder.CreateAlignedLoad(ph2, Align(sizeof(size_t)));
+            tbaa_decorate(tbaa_const, hashval);
+            return mark_or_box_ccall_result(ctx, hashval, retboxed, rt, unionall, static_rt);
+        }
+        else if (!val.isboxed) {
             // If the value is not boxed, try to compute the object id without
             // reboxing it.
             auto T_pint8_derived = PointerType::get(T_int8, AddressSpace::Derived);
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index 8684178ab02a61..23d8b7437b8230 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -10,6 +10,7 @@
 #ifdef _OS_LINUX_
 #  include <sys/syscall.h>
 #  include <sys/utsname.h>
+#  include <sys/resource.h>
 #endif
 #ifndef _OS_WINDOWS_
 #  include <sys/mman.h>
@@ -22,6 +23,7 @@
 #endif
 #ifdef _OS_FREEBSD_
 #  include <sys/types.h>
+#  include <sys/resource.h>
 #endif
 #include "julia_assert.h"
 
@@ -206,10 +208,24 @@ static intptr_t get_anon_hdl(void)
 static size_t map_offset = 0;
 // Multiple of 128MB.
 // Hopefully no one will set a ulimit for this to be a problem...
-static constexpr size_t map_size_inc = 128 * 1024 * 1024;
+static constexpr size_t map_size_inc_default = 128 * 1024 * 1024;
 static size_t map_size = 0;
 static jl_mutex_t shared_map_lock;
 
+static size_t get_map_size_inc()
+{
+    rlimit rl;
+    if (getrlimit(RLIMIT_FSIZE, &rl) != -1) {
+        if (rl.rlim_cur != RLIM_INFINITY) {
+            return std::min<size_t>(map_size_inc_default, rl.rlim_cur);
+        }
+        if (rl.rlim_max != RLIM_INFINITY) {
+            return std::min<size_t>(map_size_inc_default, rl.rlim_max);
+        }
+    }
+    return map_size_inc_default;
+}
+
 static void *create_shared_map(size_t size, size_t id)
 {
     void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED,
@@ -224,7 +240,7 @@ static intptr_t init_shared_map()
     if (anon_hdl == -1)
         return -1;
     map_offset = 0;
-    map_size = map_size_inc;
+    map_size = get_map_size_inc();
     int ret = ftruncate(anon_hdl, map_size);
     if (ret != 0) {
         perror(__func__);
@@ -238,6 +254,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
     assert(size % jl_page_size == 0);
     size_t off = jl_atomic_fetch_add(&map_offset, size);
     *id = off;
+    size_t map_size_inc = get_map_size_inc();
     if (__unlikely(off + size > map_size)) {
         JL_LOCK_NOGC(&shared_map_lock);
         size_t old_size = map_size;
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index bb0e15ed7363cf..4e9917b6b0780a 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -53,6 +53,22 @@ static Value *mark_callee_rooted(jl_codectx_t &ctx, Value *V)
         PointerType::get(T_jlvalue, AddressSpace::CalleeRooted));
 }
 
+AtomicOrdering get_llvm_atomic_order(enum jl_memory_order order)
+{
+    switch (order) {
+    case jl_memory_order_notatomic: return AtomicOrdering::NotAtomic;
+    case jl_memory_order_unordered: return AtomicOrdering::Unordered;
+    case jl_memory_order_monotonic: return AtomicOrdering::Monotonic;
+    case jl_memory_order_acquire:   return AtomicOrdering::Acquire;
+    case jl_memory_order_release:   return AtomicOrdering::Release;
+    case jl_memory_order_acq_rel:   return AtomicOrdering::AcquireRelease;
+    case jl_memory_order_seq_cst:   return AtomicOrdering::SequentiallyConsistent;
+    default:
+        assert("invalid atomic ordering");
+        abort();
+    }
+}
+
 // --- language feature checks ---
 
 #define JL_FEAT_TEST(ctx, feature) ((ctx).params->feature)
@@ -124,6 +140,7 @@ static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_value_t *jt, DIBui
             DIType *di;
             if (jl_field_isptr(jdt, i))
                 di = jl_pvalue_dillvmt;
+            // TODO: elseif jl_islayout_inline
             else
                 di = _julia_type_to_di(ctx, el, dbuilder, false);
             Elements[i] = di;
@@ -217,7 +234,7 @@ static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
                                 false, GlobalVariable::PrivateLinkage,
                                 NULL, localname);
     // LLVM passes sometimes strip metadata when moving load around
-    // since the load at the new location satisfy the same condition as the origional one.
+    // since the load at the new location satisfy the same condition as the original one.
     // Mark the global as constant to LLVM code using our own metadata
     // which is much less likely to be striped.
     gv->setMetadata("julia.constgv", MDNode::get(gv->getContext(), None));
@@ -304,7 +321,7 @@ static size_t dereferenceable_size(jl_value_t *jt)
         // Array has at least this much data
         return sizeof(jl_array_t);
     }
-    else if (jl_is_datatype(jt) && ((jl_datatype_t*)jt)->layout) {
+    else if (jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)) {
         return jl_datatype_size(jt);
     }
     return 0;
@@ -322,7 +339,7 @@ static unsigned julia_alignment(jl_value_t *jt)
         // and this is the guarantee we have for the GC bits
         return 16;
     }
-    assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->layout);
+    assert(jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt));
     unsigned alignment = jl_datatype_align(jt);
     if (alignment > JL_HEAP_ALIGNMENT)
         return JL_HEAP_ALIGNMENT;
@@ -475,7 +492,7 @@ static Value *emit_struct_gep(jl_codectx_t &ctx, Type *lty, Value *base, unsigne
     return ctx.builder.CreateConstInBoundsGEP2_32(lty, base, 0, idx);
 }
 
-static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_unionall_t *ua, bool *isboxed, bool llvmcall=false);
+static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, bool *isboxed, bool llvmcall=false);
 
 static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, bool *isboxed)
 {
@@ -486,7 +503,7 @@ static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, bool
     if (jl_is_concrete_immutable(jt)) {
         if (jl_datatype_nbits(jt) == 0)
             return T_void;
-        Type *t = _julia_struct_to_llvm(ctx, jt, NULL, isboxed);
+        Type *t = _julia_struct_to_llvm(ctx, jt, isboxed);
         assert(t != NULL);
         return t;
     }
@@ -538,29 +555,10 @@ static Type *bitstype_to_llvm(jl_value_t *bt, bool llvmcall = false)
 }
 
 static bool jl_type_hasptr(jl_value_t* typ)
-{ // assumes that jl_stored_inline(typ) is true
+{ // assumes that jl_stored_inline(typ) is true (and therefore that layout is defined)
     return jl_is_datatype(typ) && ((jl_datatype_t*)typ)->layout->npointers > 0;
 }
 
-// compute whether all concrete subtypes of this type have the same layout
-// (which is conservatively approximated here by asking whether the types of any of the
-// fields depend on any of the parameters of the containing type)
-static bool julia_struct_has_layout(jl_datatype_t *dt, jl_unionall_t *ua)
-{
-    if (dt->layout)
-        return true;
-    if (ua) {
-        jl_svec_t *types = jl_get_fieldtypes(dt);
-        size_t i, ntypes = jl_svec_len(types);
-        for (i = 0; i < ntypes; i++) {
-            jl_value_t *ty = jl_svecref(types, i);
-            if (jl_has_typevar_from_unionall(ty, ua))
-                return false;
-        }
-    }
-    return true;
-}
-
 static unsigned jl_field_align(jl_datatype_t *dt, size_t i)
 {
     unsigned al = jl_field_offset(dt, i);
@@ -569,7 +567,7 @@ static unsigned jl_field_align(jl_datatype_t *dt, size_t i)
     return std::min({al, (unsigned)jl_datatype_align(dt), (unsigned)JL_HEAP_ALIGNMENT});
 }
 
-static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_unionall_t *ua_env, bool *isboxed, bool llvmcall)
+static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, bool *isboxed, bool llvmcall)
 {
     // this function converts a Julia Type into the equivalent LLVM struct
     // use this where C-compatible (unboxed) structs are desired
@@ -584,7 +582,9 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
         bool isTuple = jl_is_tuple_type(jt);
         jl_svec_t *ftypes = jl_get_fieldtypes(jst);
         size_t i, ntypes = jl_svec_len(ftypes);
-        if (ntypes == 0 || (jst->layout && jl_datatype_nbits(jst) == 0))
+        if (!jl_struct_try_layout(jst))
+            return NULL; // caller should have checked jl_type_mappable_to_c already, but we'll be nice
+        if (ntypes == 0 || jl_datatype_nbits(jst) == 0)
             return T_void;
         Type *_struct_decl = NULL;
         // TODO: we should probably make a temporary root for `jst` somewhere
@@ -592,8 +592,6 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
         Type *&struct_decl = (ctx && !llvmcall ? ctx->llvmtypes[jst] : _struct_decl);
         if (struct_decl)
             return struct_decl;
-        if (!julia_struct_has_layout(jst, ua_env))
-            return NULL;
         std::vector<Type*> latypes(0);
         bool isarray = true;
         bool isvector = true;
@@ -605,17 +603,13 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
             if (jlasttype != NULL && ty != jlasttype)
                 isvector = false;
             jlasttype = ty;
-            size_t fsz = 0, al = 0;
-            bool isptr = !jl_islayout_inline(ty, &fsz, &al);
-            if (jst->layout) {
-                // NOTE: jl_field_isptr can disagree with jl_islayout_inline here if the
-                // struct decided this field must be a pointer due to a type circularity.
-                // Example from issue #40050: `struct B <: Ref{Tuple{B}}; end`
-                isptr = jl_field_isptr(jst, i);
-                assert((isptr ? sizeof(void*) : fsz + jl_is_uniontype(ty)) == jl_field_size(jst, i));
+            if (jl_field_isatomic(jst, i)) {
+                // TODO: eventually support this?
+                // though it's a bit unclear how the implicit load should be interpreted
+                return NULL;
             }
             Type *lty;
-            if (isptr) {
+            if (jl_field_isptr(jst, i)) {
                 lty = T_prjlvalue;
                 isvector = false;
             }
@@ -626,33 +620,34 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
                 // pick an Integer type size such that alignment will generally be correct,
                 // and always end with an Int8 (selector byte).
                 // We may need to insert padding first to get to the right offset
-                if (al > MAX_ALIGN) {
-                    Type *AlignmentType;
-#if JL_LLVM_VERSION >= 110000
-                    AlignmentType = ArrayType::get(FixedVectorType::get(T_int8, al), 0);
-#else
-                    AlignmentType = ArrayType::get(VectorType::get(T_int8, al), 0);
-#endif
-                    latypes.push_back(AlignmentType);
-                    al = MAX_ALIGN;
+                size_t fsz = 0, al = 0;
+                bool isptr = !jl_islayout_inline(ty, &fsz, &al);
+                assert(!isptr && fsz == jl_field_size(jst, i) - 1); (void)isptr;
+                if (fsz > 0) {
+                    if (al > MAX_ALIGN) {
+                        Type *AlignmentType;
+                        AlignmentType = ArrayType::get(FixedVectorType::get(T_int8, al), 0);
+                        latypes.push_back(AlignmentType);
+                        al = MAX_ALIGN;
+                    }
+                    Type *AlignmentType = IntegerType::get(jl_LLVMContext, 8 * al);
+                    unsigned NumATy = fsz / al;
+                    unsigned remainder = fsz % al;
+                    assert(al == 1 || NumATy > 0);
+                    while (NumATy--)
+                        latypes.push_back(AlignmentType);
+                    while (remainder--)
+                        latypes.push_back(T_int8);
                 }
-                assert(al <= jl_field_align(jst, i));
-                Type *AlignmentType = IntegerType::get(jl_LLVMContext, 8 * al);
-                unsigned NumATy = fsz / al;
-                unsigned remainder = fsz % al;
-                assert(al == 1 || NumATy > 0);
-                while (NumATy--)
-                    latypes.push_back(AlignmentType);
-                while (remainder--)
-                    latypes.push_back(T_int8);
                 latypes.push_back(T_int8);
                 isarray = false;
                 allghost = false;
                 continue;
             }
             else {
-                lty = _julia_struct_to_llvm(ctx, ty, NULL, &isptr, llvmcall);
-                assert(!isptr);
+                bool isptr;
+                lty = _julia_struct_to_llvm(ctx, ty, &isptr, llvmcall);
+                assert(lty && !isptr);
             }
             if (lasttype != NULL && lasttype != lty)
                 isarray = false;
@@ -672,11 +667,7 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
         }
         else if (isarray && !type_is_ghost(lasttype)) {
             if (isTuple && isvector && jl_special_vector_alignment(ntypes, jlasttype) != 0)
-#if JL_LLVM_VERSION >= 110000
                 struct_decl = FixedVectorType::get(lasttype, ntypes);
-#else
-                struct_decl = VectorType::get(lasttype, ntypes);
-#endif
             else if (isTuple || !llvmcall)
                 struct_decl = ArrayType::get(lasttype, ntypes);
             else
@@ -711,16 +702,9 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
     return T_prjlvalue;
 }
 
-static Type *julia_struct_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, jl_unionall_t *ua, bool *isboxed)
-{
-    return _julia_struct_to_llvm(&ctx.emission_context, jt, ua, isboxed);
-}
-
-bool jl_type_mappable_to_c(jl_value_t *ty)
+static Type *julia_struct_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed)
 {
-    jl_codegen_params_t params;
-    bool toboxed;
-    return _julia_struct_to_llvm(&params, ty, NULL, &toboxed) != NULL;
+    return _julia_struct_to_llvm(&ctx.emission_context, jt, isboxed);
 }
 
 static bool is_datatype_all_pointers(jl_datatype_t *dt)
@@ -990,25 +974,17 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
 
 static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 {
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), T_pint8);
-    Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, mutabl));
+    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), T_ppint8);
+    Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, name));
+    Value *Nam = tbaa_decorate(tbaa_const,
+            ctx.builder.CreateAlignedLoad(T_pint8, ctx.builder.CreateInBoundsGEP(T_pint8, Ptr, Idx), Align(sizeof(int8_t*))));
+    Value *Idx2 = ConstantInt::get(T_size, offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
     Value *mutabl = tbaa_decorate(tbaa_const,
-            ctx.builder.CreateAlignedLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Ptr, Idx), Align(1)));
+            ctx.builder.CreateAlignedLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Nam, Idx2), Align(1)));
+    mutabl = ctx.builder.CreateLShr(mutabl, 1);
     return ctx.builder.CreateTrunc(mutabl, T_int1);
 }
 
-/* this is valid code, it's simply unused
-static Value *emit_datatype_abstract(jl_codectx_t &ctx, Value *dt)
-{
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), T_pint8);
-    Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, abstract));
-
-    Value *abstract = tbaa_decorate(tbaa_const,
-            ctx.builder.CreateAlignedLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Ptr, Idx), Align(1)));
-    return ctx.builder.CreateTrunc(abstract, T_int1);
-}
-*/
-
 static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *dt)
 {
     Value *immut = ctx.builder.CreateNot(emit_datatype_mutabl(ctx, dt));
@@ -1028,27 +1004,32 @@ static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 // the error is always thrown. This may cause non dominated use
 // of SSA value error in the verifier.
 
-static void just_emit_error(jl_codectx_t &ctx, const std::string &txt)
+static void just_emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
 {
-    ctx.builder.CreateCall(prepare_call(jlerror_func), stringConstPtr(ctx.emission_context, ctx.builder, txt));
+    ctx.builder.CreateCall(F, stringConstPtr(ctx.emission_context, ctx.builder, txt));
 }
 
-static void emit_error(jl_codectx_t &ctx, const std::string &txt)
+static void emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
 {
-    just_emit_error(ctx, txt);
+    just_emit_error(ctx, F, txt);
     ctx.builder.CreateUnreachable();
-    BasicBlock *cont = BasicBlock::Create(jl_LLVMContext,"after_error",ctx.f);
+    BasicBlock *cont = BasicBlock::Create(jl_LLVMContext, "after_error", ctx.f);
     ctx.builder.SetInsertPoint(cont);
 }
 
+static void emit_error(jl_codectx_t &ctx, const std::string &txt)
+{
+    emit_error(ctx, prepare_call(jlerror_func), txt);
+}
+
 // DO NOT PASS IN A CONST CONDITION!
 static void error_unless(jl_codectx_t &ctx, Value *cond, const std::string &msg)
 {
-    BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext,"fail",ctx.f);
-    BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext,"pass");
+    BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext, "fail", ctx.f);
+    BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "pass");
     ctx.builder.CreateCondBr(cond, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
-    just_emit_error(ctx, msg);
+    just_emit_error(ctx, prepare_call(jlerror_func), msg);
     ctx.builder.CreateUnreachable();
     ctx.f->getBasicBlockList().push_back(passBB);
     ctx.builder.SetInsertPoint(passBB);
@@ -1083,6 +1064,7 @@ static Value *null_pointer_cmp(jl_codectx_t &ctx, Value *v)
     return ctx.builder.CreateICmpNE(v, Constant::getNullValue(v->getType()));
 }
 
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck = nullptr)
@@ -1095,6 +1077,62 @@ static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck =
             literal_pointer_val(ctx, jl_undefref_exception));
 }
 
+template<typename Func>
+static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
+{
+    if (auto Cond = dyn_cast<ConstantInt>(ifnot)) {
+        if (Cond->isZero())
+            return defval;
+        return func();
+    }
+    BasicBlock *currBB = ctx.builder.GetInsertBlock();
+    BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "guard_pass", ctx.f);
+    BasicBlock *exitBB = BasicBlock::Create(jl_LLVMContext, "guard_exit", ctx.f);
+    ctx.builder.CreateCondBr(ifnot, passBB, exitBB);
+    ctx.builder.SetInsertPoint(passBB);
+    auto res = func();
+    passBB = ctx.builder.GetInsertBlock();
+    ctx.builder.CreateBr(exitBB);
+    ctx.builder.SetInsertPoint(exitBB);
+    if (defval == nullptr)
+        return nullptr;
+    PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2);
+    phi->addIncoming(defval, currBB);
+    phi->addIncoming(res, passBB);
+    return phi;
+}
+
+template<typename Func>
+static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, bool defval, Func &&func)
+{
+    return emit_guarded_test(ctx, ifnot, ConstantInt::get(T_int1, defval), func);
+}
+
+template<typename Func>
+static Value *emit_nullcheck_guard(jl_codectx_t &ctx, Value *nullcheck, Func &&func)
+{
+    if (!nullcheck)
+        return func();
+    return emit_guarded_test(ctx, null_pointer_cmp(ctx, nullcheck), false, func);
+}
+
+template<typename Func>
+static Value *emit_nullcheck_guard2(jl_codectx_t &ctx, Value *nullcheck1,
+                                    Value *nullcheck2, Func &&func)
+{
+    if (!nullcheck1)
+        return emit_nullcheck_guard(ctx, nullcheck2, func);
+    if (!nullcheck2)
+        return emit_nullcheck_guard(ctx, nullcheck1, func);
+    nullcheck1 = null_pointer_cmp(ctx, nullcheck1);
+    nullcheck2 = null_pointer_cmp(ctx, nullcheck2);
+    // If both are NULL, return true.
+    return emit_guarded_test(ctx, ctx.builder.CreateOr(nullcheck1, nullcheck2), true, [&] {
+        return emit_guarded_test(ctx, ctx.builder.CreateAnd(nullcheck1, nullcheck2),
+                                 false, func);
+    });
+}
+
 static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
 {
     Value *msg_val = stringConstPtr(ctx.emission_context, ctx.builder, msg);
@@ -1119,7 +1157,7 @@ static bool _can_optimize_isa(jl_value_t *type, int &counter)
     if (jl_is_concrete_type(type))
         return true;
     jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(type);
-    if (jl_is_datatype(dt) && !dt->abstract && jl_subtype(dt->name->wrapper, type))
+    if (jl_is_datatype(dt) && !dt->name->abstract && jl_subtype(dt->name->wrapper, type))
         return true;
     return false;
 }
@@ -1130,6 +1168,15 @@ static bool can_optimize_isa_union(jl_uniontype_t *type)
     return (_can_optimize_isa(type->a, counter) && _can_optimize_isa(type->b, counter));
 }
 
+// a simple case of emit_isa that is obvious not to include a safe-point
+static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_value_t *dt)
+{
+    assert(jl_is_concrete_type(dt));
+    return ctx.builder.CreateICmpEQ(
+            emit_typeof_boxed(ctx, arg),
+            track_pjlvalue(ctx, literal_pointer_val(ctx, dt)));
+}
+
 static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
                                         jl_value_t *type, const std::string *msg);
 
@@ -1227,15 +1274,10 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
                 return std::make_pair(ConstantInt::get(T_int1, 0), false);
             }
         }
-        if (auto val = ((jl_datatype_t*)intersected_type)->instance) {
-            auto ptr = track_pjlvalue(ctx, literal_pointer_val(ctx, val));
-            return {ctx.builder.CreateICmpEQ(boxed(ctx, x), ptr), false};
-        }
-        return std::make_pair(ctx.builder.CreateICmpEQ(emit_typeof_boxed(ctx, x),
-            track_pjlvalue(ctx, literal_pointer_val(ctx, intersected_type))), false);
+        return std::make_pair(emit_exactly_isa(ctx, x, intersected_type), false);
     }
     jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(intersected_type);
-    if (jl_is_datatype(dt) && !dt->abstract && jl_subtype(dt->name->wrapper, type)) {
+    if (jl_is_datatype(dt) && !dt->name->abstract && jl_subtype(dt->name->wrapper, type)) {
         // intersection is a supertype of all instances of its constructor,
         // so the isa test reduces to a comparison of the typename by pointer
         return std::make_pair(
@@ -1296,8 +1338,9 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
 {
     Value *isconcrete;
-    isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, emit_bitcast(ctx, decay_derived(ctx, typ), T_pint8), offsetof(jl_datatype_t, isconcretetype));
+    isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, emit_bitcast(ctx, decay_derived(ctx, typ), T_pint8), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     isconcrete = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_int8, isconcrete, Align(1)));
+    isconcrete = ctx.builder.CreateLShr(isconcrete, 1);
     isconcrete = ctx.builder.CreateTrunc(isconcrete, T_int1);
     return isconcrete;
 }
@@ -1412,17 +1455,43 @@ Value *extract_first_ptr(jl_codectx_t &ctx, Value *V)
     return ctx.builder.CreateExtractValue(V, path);
 }
 
+
+static void emit_lockstate_value(jl_codectx_t &ctx, Value *strct, bool newstate)
+{
+    Value *v = mark_callee_rooted(ctx, strct);
+    ctx.builder.CreateCall(prepare_call(newstate ? jllockvalue_func : jlunlockvalue_func), v);
+}
+static void emit_lockstate_value(jl_codectx_t &ctx, const jl_cgval_t &strct, bool newstate)
+{
+    assert(strct.isboxed);
+    emit_lockstate_value(ctx, boxed(ctx, strct), newstate);
+}
+
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, jl_value_t *jltype,
-                             MDNode *tbaa, MDNode *aliasscope,
+                             MDNode *tbaa, MDNode *aliasscope, bool isboxed, AtomicOrdering Order,
                              bool maybe_null_if_boxed = true, unsigned alignment = 0,
                              Value **nullcheck = nullptr)
 {
-    bool isboxed;
-    Type *elty = julia_type_to_llvm(ctx, jltype, &isboxed);
+    Type *elty = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, jltype);
     if (type_is_ghost(elty))
         return ghostValue(jltype);
+    AllocaInst *intcast = NULL;
+    if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
+        const DataLayout &DL = jl_data_layout;
+        unsigned nb = DL.getTypeSizeInBits(elty);
+        intcast = ctx.builder.CreateAlloca(elty);
+        elty = Type::getIntNTy(jl_LLVMContext, nb);
+    }
+    Type *realelty = elty;
+    if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
+        unsigned nb = cast<IntegerType>(elty)->getBitWidth();
+        unsigned nb2 = PowerOf2Ceil(nb);
+        if (nb != nb2)
+            elty = Type::getIntNTy(jl_LLVMContext, nb2);
+    }
     Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
     Value *data;
     if (ptr->getType() != ptrty)
@@ -1431,7 +1500,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         data = ptr;
     if (idx_0based)
         data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based);
-    Instruction *load;
+    Value *instr;
     // TODO: can only lazy load if we can create a gc root for ptr for the lifetime of elt
     //if (elty->isAggregateType() && tbaa == tbaa_immut && !alignment) { // can lazy load on demand, no copy needed
     //    elt = data;
@@ -1441,17 +1510,23 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
             alignment = sizeof(void*);
         else if (!alignment)
             alignment = julia_alignment(jltype);
-        load = ctx.builder.CreateAlignedLoad(data, Align(alignment), false);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(data, Align(alignment), false);
+        load->setOrdering(Order);
         if (aliasscope)
             load->setMetadata("alias.scope", aliasscope);
-        if (isboxed) {
-            cast<LoadInst>(load)->setOrdering(AtomicOrdering::Unordered);
-            load = maybe_mark_load_dereferenceable(load, true, jltype);
-        }
+        if (isboxed)
+            maybe_mark_load_dereferenceable(load, true, jltype);
         if (tbaa)
-            load = tbaa_decorate(tbaa, load);
+            tbaa_decorate(tbaa, load);
+        instr = load;
+        if (elty != realelty)
+            instr = ctx.builder.CreateTrunc(instr, realelty);
+        if (intcast) {
+            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
+            instr = ctx.builder.CreateLoad(intcast);
+        }
         if (maybe_null_if_boxed) {
-            Value *first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
+            Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
             if (first_ptr)
                 null_pointer_check(ctx, first_ptr, nullcheck);
         }
@@ -1461,26 +1536,84 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         //load->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) }));
-        load = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, load, T_int1));
+        instr = ctx.builder.CreateTrunc(instr, T_int1);
     }
-    return mark_julia_type(ctx, load, isboxed, jltype);
+    return mark_julia_type(ctx, instr, isboxed, jltype);
 }
 
-static void typed_store(jl_codectx_t &ctx,
-        Value *ptr, Value *idx_0based, const jl_cgval_t &rhs,
+static jl_cgval_t typed_store(jl_codectx_t &ctx,
+        Value *ptr, Value *idx_0based, jl_cgval_t rhs, jl_cgval_t cmp,
         jl_value_t *jltype, MDNode *tbaa, MDNode *aliasscope,
         Value *parent,  // for the write barrier, NULL if no barrier needed
-        unsigned alignment = 0)
+        bool isboxed, AtomicOrdering Order, AtomicOrdering FailOrder, unsigned alignment,
+        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
+        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const std::string &fname)
 {
-    bool isboxed;
-    Type *elty = julia_type_to_llvm(ctx, jltype, &isboxed);
-    if (type_is_ghost(elty))
-        return;
-    Value *r;
-    if (!isboxed)
-        r = emit_unbox(ctx, elty, rhs, jltype);
-    else
-        r = boxed(ctx, rhs);
+    auto newval = [&](const jl_cgval_t &lhs) {
+        const jl_cgval_t argv[3] = { cmp, lhs, rhs };
+        jl_cgval_t ret;
+        if (modifyop) {
+            ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+        }
+        else {
+            Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+            ret = mark_julia_type(ctx, callval, true, jl_any_type);
+        }
+        if (!jl_subtype(ret.typ, jltype)) {
+            emit_typecheck(ctx, ret, jltype, fname + "typed_store");
+            ret = update_julia_type(ctx, ret, jltype);
+        }
+        return ret;
+    };
+    assert(!needlock || parent != nullptr);
+    Type *elty = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, jltype);
+    if (type_is_ghost(elty)) {
+        if (isStrongerThanMonotonic(Order))
+            ctx.builder.CreateFence(Order);
+        if (issetfield) {
+            return rhs;
+        }
+        else if (isreplacefield) {
+            Value *Success = emit_f_is(ctx, cmp, ghostValue(jltype));
+            Success = ctx.builder.CreateZExt(Success, T_int8);
+            const jl_cgval_t argv[2] = {ghostValue(jltype), mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+        else if (isswapfield) {
+            return ghostValue(jltype);
+        }
+        else { // modifyfield
+            jl_cgval_t oldval = ghostValue(jltype);
+            const jl_cgval_t argv[2] = { oldval, newval(oldval) };
+            jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+    }
+    Value *intcast = nullptr;
+    if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
+        const DataLayout &DL = jl_data_layout;
+        unsigned nb = DL.getTypeSizeInBits(elty);
+        if (!issetfield)
+            intcast = ctx.builder.CreateAlloca(elty);
+        elty = Type::getIntNTy(jl_LLVMContext, nb);
+    }
+    Type *realelty = elty;
+    if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
+        unsigned nb = cast<IntegerType>(elty)->getBitWidth();
+        unsigned nb2 = PowerOf2Ceil(nb);
+        if (nb != nb2)
+            elty = Type::getIntNTy(jl_LLVMContext, nb2);
+    }
+    Value *r = nullptr;
+    if (issetfield || isswapfield || isreplacefield)  {
+        if (!isboxed)
+            r = emit_unbox(ctx, realelty, rhs, jltype);
+        else
+            r = boxed(ctx, rhs);
+        if (realelty != elty)
+            r = ctx.builder.CreateZExt(r, elty);
+    }
     Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
     if (ptr->getType() != ptrty)
         ptr = ctx.builder.CreateBitCast(ptr, ptrty);
@@ -1490,19 +1623,276 @@ static void typed_store(jl_codectx_t &ctx,
         alignment = sizeof(void*);
     else if (!alignment)
         alignment = julia_alignment(jltype);
-    StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
-    if (isboxed) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-        store->setOrdering(AtomicOrdering::Unordered);
-    if (aliasscope)
-        store->setMetadata("noalias", aliasscope);
-    if (tbaa)
-        tbaa_decorate(tbaa, store);
+    Value *instr = nullptr;
+    Value *Compare = nullptr;
+    Value *Success = nullptr;
+    BasicBlock *DoneBB = nullptr;
+    if (needlock)
+        emit_lockstate_value(ctx, parent, true);
+    jl_cgval_t oldval = rhs;
+    if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) {
+        if (isswapfield) {
+            auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            if (aliasscope)
+                load->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, load);
+            assert(realelty == elty);
+            instr = load;
+        }
+        StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
+        store->setOrdering(Order);
+        if (aliasscope)
+            store->setMetadata("noalias", aliasscope);
+        if (tbaa)
+            tbaa_decorate(tbaa, store);
+    }
+    else if (isswapfield && !isboxed) {
+        // we can't handle isboxed here as a workaround for really bad LLVM
+        // design issue: plain Xchg only works with integers
+#if JL_LLVM_VERSION >= 130000
+        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
+#else
+        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Order);
+        store->setAlignment(Align(alignment));
+#endif
+        if (aliasscope)
+            store->setMetadata("noalias", aliasscope);
+        if (tbaa)
+            tbaa_decorate(tbaa, store);
+        instr = store;
+    }
+    else {
+        // replacefield, modifyfield, or swapfield (isboxed && atomic)
+        DoneBB = BasicBlock::Create(jl_LLVMContext, "done_xchg", ctx.f);
+        bool needloop;
+        PHINode *Succ = nullptr, *Current = nullptr;
+        if (isreplacefield) {
+            if (Order == AtomicOrdering::NotAtomic) {
+                needloop = false;
+            }
+            else if (!isboxed) {
+                needloop = ((jl_datatype_t*)jltype)->layout->haspadding;
+                Value *SameType = emit_isa(ctx, cmp, jltype, nullptr).first;
+                if (SameType != ConstantInt::getTrue(jl_LLVMContext)) {
+                    BasicBlock *SkipBB = BasicBlock::Create(jl_LLVMContext, "skip_xchg", ctx.f);
+                    BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "ok_xchg", ctx.f);
+                    ctx.builder.CreateCondBr(SameType, BB, SkipBB);
+                    ctx.builder.SetInsertPoint(SkipBB);
+                    LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+                    load->setOrdering(FailOrder);
+                    if (aliasscope)
+                        load->setMetadata("noalias", aliasscope);
+                    if (tbaa)
+                        tbaa_decorate(tbaa, load);
+                    instr = load;
+                    ctx.builder.CreateBr(DoneBB);
+                    ctx.builder.SetInsertPoint(DoneBB);
+                    Succ = ctx.builder.CreatePHI(T_int1, 2);
+                    Succ->addIncoming(ConstantInt::get(T_int1, false), SkipBB);
+                    Current = ctx.builder.CreatePHI(instr->getType(), 2);
+                    Current->addIncoming(instr, SkipBB);
+                    ctx.builder.SetInsertPoint(BB);
+                }
+                Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                if (realelty != elty)
+                    Compare = ctx.builder.CreateZExt(Compare, elty);
+            }
+            else if (cmp.isboxed) {
+                Compare = boxed(ctx, cmp);
+                needloop = !jl_is_mutable_datatype(jltype);
+            }
+            else {
+                Compare = V_rnull;
+                needloop = true;
+            }
+        }
+        else { // swap or modify
+            LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            Current->setOrdering(Order == AtomicOrdering::NotAtomic ? Order : AtomicOrdering::Monotonic);
+            if (aliasscope)
+                Current->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, Current);
+            Compare = Current;
+            needloop = !isswapfield || Order != AtomicOrdering::NotAtomic;
+        }
+        BasicBlock *BB = NULL;
+        PHINode *CmpPhi = NULL;
+        if (needloop) {
+            BasicBlock *From = ctx.builder.GetInsertBlock();
+            BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+            ctx.builder.CreateBr(BB);
+            ctx.builder.SetInsertPoint(BB);
+            CmpPhi = ctx.builder.CreatePHI(elty, 2);
+            CmpPhi->addIncoming(Compare, From);
+            Compare = CmpPhi;
+        }
+        if (ismodifyfield) {
+            if (needlock)
+                emit_lockstate_value(ctx, parent, false);
+            Value *realCompare = Compare;
+            if (realelty != elty)
+                realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
+            if (intcast) {
+                ctx.builder.CreateStore(realCompare, ctx.builder.CreateBitCast(intcast, realCompare->getType()->getPointerTo()));
+                if (maybe_null_if_boxed)
+                    realCompare = ctx.builder.CreateLoad(intcast);
+            }
+            if (maybe_null_if_boxed) {
+                Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
+                if (first_ptr)
+                    null_pointer_check(ctx, first_ptr, nullptr);
+            }
+            if (intcast)
+                oldval = mark_julia_slot(intcast, jltype, NULL, tbaa_stack);
+            else
+                oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
+            rhs = newval(oldval);
+            if (!isboxed)
+                r = emit_unbox(ctx, realelty, rhs, jltype);
+            else
+                r = boxed(ctx, rhs);
+            if (realelty != elty)
+                r = ctx.builder.CreateZExt(r, elty);
+            if (needlock)
+                emit_lockstate_value(ctx, parent, true);
+            cmp = oldval;
+        }
+        Value *Done;
+        if (Order == AtomicOrdering::NotAtomic) {
+            // modifyfield or replacefield
+            assert(elty == realelty && !intcast);
+            auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            if (aliasscope)
+                load->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, load);
+            Value *first_ptr = nullptr;
+            if (maybe_null_if_boxed && !ismodifyfield)
+                first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
+            oldval = mark_julia_type(ctx, load, isboxed, jltype);
+            Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
+                return emit_f_is(ctx, oldval, cmp);
+            });
+            if (needloop && ismodifyfield)
+                CmpPhi->addIncoming(load, ctx.builder.GetInsertBlock());
+            assert(Succ == nullptr);
+            BasicBlock *XchgBB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+            ctx.builder.CreateCondBr(Success, XchgBB, needloop && ismodifyfield ? BB : DoneBB);
+            ctx.builder.SetInsertPoint(XchgBB);
+            auto *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
+            if (aliasscope)
+                store->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, store);
+            ctx.builder.CreateBr(DoneBB);
+            instr = load;
+        }
+        else {
+            if (Order == AtomicOrdering::Unordered)
+                Order = AtomicOrdering::Monotonic;
+            if (!isreplacefield)
+                FailOrder = AtomicOrdering::Monotonic;
+            else if (FailOrder == AtomicOrdering::Unordered)
+                FailOrder = AtomicOrdering::Monotonic;
+#if JL_LLVM_VERSION >= 130000
+            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Align(alignment), Order, FailOrder);
+#else
+            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Order, FailOrder);
+            store->setAlignment(Align(alignment));
+#endif
+            if (aliasscope)
+                store->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, store);
+            instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
+            Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
+            Done = Success;
+            if (isreplacefield && needloop) {
+                Value *realinstr = instr;
+                if (realelty != elty)
+                    realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
+                if (intcast) {
+                    ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo()));
+                    oldval = mark_julia_slot(intcast, jltype, NULL, tbaa_stack);
+                    if (maybe_null_if_boxed)
+                        realinstr = ctx.builder.CreateLoad(intcast);
+                }
+                else {
+                    oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
+                }
+                Done = emit_guarded_test(ctx, ctx.builder.CreateNot(Success), false, [&] {
+                    Value *first_ptr = nullptr;
+                    if (maybe_null_if_boxed)
+                        first_ptr = isboxed ? realinstr : extract_first_ptr(ctx, realinstr);
+                    return emit_nullcheck_guard(ctx, first_ptr, [&] {
+                        return emit_f_is(ctx, oldval, cmp);
+                    });
+                });
+                Done = ctx.builder.CreateNot(Done);
+            }
+            if (needloop)
+                ctx.builder.CreateCondBr(Done, DoneBB, BB);
+            else
+                ctx.builder.CreateBr(DoneBB);
+            if (needloop)
+                CmpPhi->addIncoming(instr, ctx.builder.GetInsertBlock());
+        }
+        if (Succ != nullptr) {
+            Current->addIncoming(instr, ctx.builder.GetInsertBlock());
+            instr = Current;
+            Succ->addIncoming(Success, ctx.builder.GetInsertBlock());
+            Success = Succ;
+        }
+    }
+    if (DoneBB)
+        ctx.builder.SetInsertPoint(DoneBB);
+    if (needlock)
+        emit_lockstate_value(ctx, parent, false);
     if (parent != NULL) {
+        if (isreplacefield) {
+            // TOOD: avoid this branch if we aren't making a write barrier
+            BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg_wb", ctx.f);
+            DoneBB = BasicBlock::Create(jl_LLVMContext, "done_xchg_wb", ctx.f);
+            ctx.builder.CreateCondBr(Success, BB, DoneBB);
+            ctx.builder.SetInsertPoint(BB);
+        }
         if (!isboxed)
             emit_write_multibarrier(ctx, parent, r, rhs.typ);
         else if (!type_is_permalloc(rhs.typ))
             emit_write_barrier(ctx, parent, r);
+        if (isreplacefield) {
+            ctx.builder.CreateBr(DoneBB);
+            ctx.builder.SetInsertPoint(DoneBB);
+        }
+    }
+    if (ismodifyfield) {
+        const jl_cgval_t argv[2] = { oldval, rhs };
+        jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
     }
+    else if (!issetfield) { // swapfield or replacefield
+        if (realelty != elty)
+            instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
+        if (intcast) {
+            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
+            instr = ctx.builder.CreateLoad(intcast);
+        }
+        if (maybe_null_if_boxed) {
+            Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
+            if (first_ptr)
+                null_pointer_check(ctx, first_ptr, nullptr);
+        }
+        oldval = mark_julia_type(ctx, instr, isboxed, jltype);
+        if (isreplacefield) {
+            Success = ctx.builder.CreateZExt(Success, T_int8);
+            const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+    }
+    return oldval;
 }
 
 // --- convert boolean value to julia ---
@@ -1580,11 +1970,7 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Va
     // for the load part (x.tbaa) and the store part (tbaa_stack).
     // since the tbaa lattice has to be a tree we have unfortunately
     // x.tbaa ∪ tbaa_stack = tbaa_root if x.tbaa != tbaa_stack
-#if JL_LLVM_VERSION >= 100000
     ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-#else
-    ctx.builder.CreateMemCpy(dst, align, src, 0, sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-#endif
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Value *src, MDNode *tbaa_src,
@@ -1594,11 +1980,7 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Va
         emit_memcpy_llvm(ctx, dst, tbaa_dst, src, tbaa_src, const_sz->getZExtValue(), align, is_volatile);
         return;
     }
-#if JL_LLVM_VERSION >= 100000
     ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-#else
-    ctx.builder.CreateMemCpy(dst, align, src, 0, sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-#endif
 }
 
 template<typename T1>
@@ -1616,17 +1998,22 @@ static void emit_memcpy(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, const j
 }
 
 
+static void emit_atomic_error(jl_codectx_t &ctx, const std::string &msg)
+{
+    emit_error(ctx, prepare_call(jlatomicerror_func), msg);
+}
 
 static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &strct,
                                          unsigned idx, jl_datatype_t *jt,
-                                         Value **nullcheck = nullptr);
+                                         enum jl_memory_order order, Value **nullcheck=nullptr);
 
 static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
         jl_cgval_t *ret, jl_cgval_t strct,
-        Value *idx, jl_datatype_t *stt, jl_value_t *inbounds)
+        Value *idx, jl_datatype_t *stt, jl_value_t *inbounds,
+        enum jl_memory_order order)
 {
     size_t nfields = jl_datatype_nfields(stt);
-    bool maybe_null = (unsigned)stt->ninitialized != nfields;
+    bool maybe_null = (unsigned)stt->name->n_uninitialized != 0;
     auto idx0 = [&]() {
         return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(T_size, nfields), inbounds);
     };
@@ -1636,8 +2023,11 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
         return true;
     }
     if (nfields == 1) {
+        if (jl_has_free_typevars(jl_field_type(stt, 0))) {
+            return false;
+        }
         (void)idx0();
-        *ret = emit_getfield_knownidx(ctx, strct, 0, stt);
+        *ret = emit_getfield_knownidx(ctx, strct, 0, stt, order);
         return true;
     }
     assert(!jl_is_vecelement_type((jl_value_t*)stt));
@@ -1695,7 +2085,13 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
         }
     }
 
-    if (strct.ispointer()) { // boxed or stack
+    bool maybeatomic = stt->name->atomicfields != NULL;
+    if (strct.ispointer() && !maybeatomic) { // boxed or stack
+        if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+            emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+            *ret = jl_cgval_t(); // unreachable
+            return true;
+        }
         if (is_datatype_all_pointers(stt)) {
             size_t minimum_field_size = std::numeric_limits<size_t>::max();
             size_t minimum_align = JL_HEAP_ALIGNMENT;
@@ -1729,7 +2125,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             assert(jl_is_concrete_type(jft));
             idx = idx0();
             Value *ptr = maybe_decay_tracked(ctx, data_pointer(ctx, strct));
-            if (!stt->mutabl && !(maybe_null && (jft == (jl_value_t*)jl_bool_type ||
+            if (!stt->name->mutabl && !(maybe_null && (jft == (jl_value_t*)jl_bool_type ||
                                                  ((jl_datatype_t*)jft)->layout->npointers))) {
                 // just compute the pointer and let user load it when necessary
                 Type *fty = julia_type_to_llvm(ctx, jft);
@@ -1737,7 +2133,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
                 *ret = mark_julia_slot(addr, jft, NULL, strct.tbaa);
                 return true;
             }
-            *ret = typed_load(ctx, ptr, idx, jft, strct.tbaa, nullptr, maybe_null);
+            *ret = typed_load(ctx, ptr, idx, jft, strct.tbaa, nullptr, false, AtomicOrdering::NotAtomic, maybe_null);
             return true;
         }
         else if (strct.isboxed) {
@@ -1750,20 +2146,52 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     return false;
 }
 
+static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, jl_value_t *jfty, size_t fsz, size_t al, MDNode *tbaa, bool mutabl)
+{
+    Instruction *tindex0 = tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedLoad(T_int8, ptindex, Align(1)));
+    //tindex0->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
+    //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
+    //    ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) }));
+    Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), tindex0);
+    if (mutabl) {
+        // move value to an immutable stack slot (excluding tindex)
+        Type *ET = IntegerType::get(jl_LLVMContext, 8 * al);
+        AllocaInst *lv = emit_static_alloca(ctx, ET);
+        lv->setOperand(0, ConstantInt::get(T_int32, (fsz + al - 1) / al));
+        emit_memcpy(ctx, lv, tbaa, addr, tbaa, fsz, al);
+        addr = lv;
+    }
+    return mark_julia_slot(addr, jfty, tindex, tbaa);
+}
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &strct,
                                          unsigned idx, jl_datatype_t *jt,
-                                         Value **nullcheck)
+                                         enum jl_memory_order order, Value **nullcheck)
 {
     jl_value_t *jfty = jl_field_type(jt, idx);
+    bool isatomic = jl_field_isatomic(jt, idx);
+    bool needlock = isatomic && !jl_field_isptr(jt, idx) && jl_datatype_size(jfty) > MAX_ATOMIC_SIZE;
+    if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+        emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+        return jl_cgval_t(); // unreachable
+    }
+    if (isatomic && order == jl_memory_order_notatomic) {
+        emit_atomic_error(ctx, "getfield: atomic field cannot be accessed non-atomically");
+        return jl_cgval_t(); // unreachable
+    }
+    if (order == jl_memory_order_unspecified) {
+        order = isatomic ? jl_memory_order_unordered : jl_memory_order_notatomic;
+    }
     if (jfty == jl_bottom_type) {
         raise_exception(ctx, literal_pointer_val(ctx, jl_undefref_exception));
         return jl_cgval_t(); // unreachable
     }
     if (type_is_ghost(julia_type_to_llvm(ctx, jfty)))
         return ghostValue(jfty);
-    bool maybe_null = idx >= (unsigned)jt->ninitialized;
+    size_t nfields = jl_datatype_nfields(jt);
+    bool maybe_null = idx >= nfields - (unsigned)jt->name->n_uninitialized;
     size_t byte_offset = jl_field_offset(jt, idx);
     auto tbaa = strct.tbaa;
     if (tbaa == tbaa_datatype && byte_offset != offsetof(jl_datatype_t, types))
@@ -1798,7 +2226,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         }
         if (jl_field_isptr(jt, idx)) {
             LoadInst *Load = ctx.builder.CreateAlignedLoad(T_prjlvalue, maybe_bitcast(ctx, addr, T_pprjlvalue), Align(sizeof(void*)));
-            Load->setOrdering(AtomicOrdering::Unordered);
+            Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx));
             Value *fldv = tbaa_decorate(tbaa, Load);
             if (maybe_null)
@@ -1817,29 +2245,23 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             else {
                 ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz);
             }
-            Instruction *tindex0 = tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedLoad(T_int8, ptindex, Align(1)));
-            //tindex0->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
-            //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
-            //    ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) }));
-            Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), tindex0);
-            if (jt->mutabl) {
-                // move value to an immutable stack slot (excluding tindex)
-                Type *ET = IntegerType::get(jl_LLVMContext, 8 * al);
-                AllocaInst *lv = emit_static_alloca(ctx, ET);
-                lv->setOperand(0, ConstantInt::get(T_int32, (fsz + al - 1) / al));
-                emit_memcpy(ctx, lv, tbaa, addr, tbaa, fsz, al);
-                addr = lv;
-            }
-            return mark_julia_slot(addr, jfty, tindex, tbaa);
+            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, jt->name->mutabl);
         }
         assert(jl_is_concrete_type(jfty));
-        if (!jt->mutabl && !(maybe_null && (jfty == (jl_value_t*)jl_bool_type ||
+        if (!jt->name->mutabl && !(maybe_null && (jfty == (jl_value_t*)jl_bool_type ||
                                             ((jl_datatype_t*)jfty)->layout->npointers))) {
             // just compute the pointer and let user load it when necessary
             return mark_julia_slot(addr, jfty, NULL, tbaa);
         }
         unsigned align = jl_field_align(jt, idx);
-        return typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, maybe_null, align, nullcheck);
+        if (needlock)
+            emit_lockstate_value(ctx, strct, true);
+        jl_cgval_t ret = typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
+                needlock ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order), // TODO: we should use unordered for anything with CountTrackedPointers(elty).count > 0
+                maybe_null, align, nullcheck);
+        if (needlock)
+            emit_lockstate_value(ctx, strct, false);
+        return ret;
     }
     else if (isa<UndefValue>(strct.V)) {
         return jl_cgval_t();
@@ -2069,7 +2491,7 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo,
         ctx.builder.CreateStructGEP(jl_array_llvmt,
             emit_bitcast(ctx, t, jl_parray_llvmt),
             0); // index (not offset) of data field in jl_parray_llvmt
-    // Normally allocated array of 0 dimention always have a inline pointer.
+    // Normally allocated array of 0 dimension always have a inline pointer.
     // However, we can't rely on that here since arrays can also be constructed from C pointers.
     MDNode *tbaa = arraytype_constshape(tinfo.typ) ? tbaa_const : tbaa_arrayptr;
     PointerType *PT = cast<PointerType>(addr->getType());
@@ -2448,7 +2870,7 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
         v = ctx.builder.CreateExtractValue(v, makeArrayRef(&zero, 1));
         box = call_with_attrs(ctx, box_ssavalue_func, v);
     }
-    else if (!jb->abstract && jl_datatype_nbits(jb) == 0) {
+    else if (!jb->name->abstract && jl_datatype_nbits(jb) == 0) {
         // singleton
         assert(jb->instance != NULL);
         return track_pjlvalue(ctx, literal_pointer_val(ctx, jb->instance));
@@ -2472,6 +2894,27 @@ static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype, jl_value_t
     return tindex;
 }
 
+// Returns typeof(v), or null if v is a null pointer at run time.
+// This is used when the value might have come from an undefined variable,
+// yet we try to read its type to compute a union index when moving the value.
+static Value *emit_typeof_or_null(jl_codectx_t &ctx, Value *v)
+{
+    BasicBlock *nonnull = BasicBlock::Create(jl_LLVMContext, "nonnull", ctx.f);
+    BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "postnull", ctx.f);
+    Value *isnull = ctx.builder.CreateICmpEQ(v, Constant::getNullValue(v->getType()));
+    ctx.builder.CreateCondBr(isnull, postBB, nonnull);
+    BasicBlock *entry = ctx.builder.GetInsertBlock();
+    ctx.builder.SetInsertPoint(nonnull);
+    Value *typof = emit_typeof(ctx, v);
+    ctx.builder.CreateBr(postBB);
+    nonnull = ctx.builder.GetInsertBlock(); // could have changed
+    ctx.builder.SetInsertPoint(postBB);
+    PHINode *ti = ctx.builder.CreatePHI(typof->getType(), 2);
+    ti->addIncoming(Constant::getNullValue(typof->getType()), entry);
+    ti->addIncoming(typof, nonnull);
+    return ti;
+}
+
 // get the runtime tindex value, assuming val is already converted to type typ if it has a TIndex
 static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ)
 {
@@ -2482,9 +2925,12 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j
 
     if (val.TIndex)
         return ctx.builder.CreateAnd(val.TIndex, ConstantInt::get(T_int8, 0x7f));
-    if (val.isboxed)
-        return compute_box_tindex(ctx, emit_typeof_boxed(ctx, val), val.typ, typ);
-    return compute_box_tindex(ctx, emit_typeof_boxed(ctx, val), val.typ, typ);
+    Value *typof;
+    if (val.isboxed && !jl_is_concrete_type(val.typ) && !jl_is_type_type(val.typ))
+        typof = emit_typeof_or_null(ctx, val.V);
+    else
+        typof = emit_typeof_boxed(ctx, val);
+    return compute_box_tindex(ctx, typof, val.typ, typ);
 }
 
 static void union_alloca_type(jl_uniontype_t *ut,
@@ -2652,13 +3098,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
 {
     if (AllocaInst *ai = dyn_cast<AllocaInst>(dest))
         // TODO: make this a lifetime_end & dereferencable annotation?
-        ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai,
-#if JL_LLVM_VERSION >= 110000
-                ai->getAlign()
-#else
-                ai->getAlignment()
-#endif
-                );
+        ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlign());
     if (jl_is_concrete_type(src.typ) || src.constant) {
         jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ;
         Type *store_ty = julia_type_to_llvm(ctx, typ);
@@ -2764,7 +3204,7 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
 // allocation for known size object
 static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
 {
-    Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
+    Value *ptls_ptr = emit_bitcast(ctx, get_current_ptls(ctx), T_pint8);
     Function *F = prepare_call(jl_alloc_obj_func);
     auto call = ctx.builder.CreateCall(F, {ptls_ptr, ConstantInt::get(T_size, static_size), maybe_decay_untracked(ctx, jt)});
     call->setAttributes(F->getAttributes());
@@ -2830,57 +3270,112 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg
 }
 
 
-static void emit_setfield(jl_codectx_t &ctx,
+static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_datatype_t *sty, const jl_cgval_t &strct, size_t idx0,
-        const jl_cgval_t &rhs, bool checked, bool wb)
-{
-    if (sty->mutabl || !checked) {
-        assert(strct.ispointer());
-        size_t byte_offset = jl_field_offset(sty, idx0);
-        Value *addr = data_pointer(ctx, strct);
-        if (byte_offset > 0) {
-            addr = ctx.builder.CreateInBoundsGEP(
-                    T_int8,
-                    emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8),
-                    ConstantInt::get(T_size, byte_offset)); // TODO: use emit_struct_gep
-        }
-        jl_value_t *jfty = jl_svecref(sty->types, idx0);
-        if (jl_field_isptr(sty, idx0)) {
-            Value *r = boxed(ctx, rhs); // don't need a temporary gcroot since it'll be rooted by strct
-            cast<StoreInst>(tbaa_decorate(strct.tbaa, ctx.builder.CreateAlignedStore(r,
-                        emit_bitcast(ctx, addr, T_pprjlvalue),
-                        Align(sizeof(jl_value_t*)))))
-                    ->setOrdering(AtomicOrdering::Unordered);
-            if (wb && strct.isboxed && !type_is_permalloc(rhs.typ))
-                emit_write_barrier(ctx, boxed(ctx, strct), r);
-        }
-        else if (jl_is_uniontype(jfty)) {
-            int fsz = jl_field_size(sty, idx0) - 1;
-            // compute tindex from rhs
-            jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
-            if (rhs_union.typ == jl_bottom_type)
-                return;
-            Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
-            tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
-            Value *ptindex = ctx.builder.CreateInBoundsGEP(T_int8, emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8), ConstantInt::get(T_size, fsz));
-            tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
-            // copy data
-            if (!rhs.isghost) {
-                emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
-            }
-        }
-        else {
-            unsigned align = jl_field_align(sty, idx0);
-            typed_store(ctx, addr, NULL, rhs, jfty,
-                strct.tbaa, nullptr, maybe_bitcast(ctx,
-                data_pointer(ctx, strct), T_pjlvalue), align);
-        }
-    }
-    else {
-        std::string msg = "setfield! immutable struct of type "
+        jl_cgval_t rhs, jl_cgval_t cmp,
+        bool checked, bool wb, AtomicOrdering Order, AtomicOrdering FailOrder,
+        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
+        const jl_cgval_t *modifyop, const std::string &fname)
+{
+    if (!sty->name->mutabl && checked) {
+        std::string msg = fname + "immutable struct of type "
             + std::string(jl_symbol_name(sty->name->name))
             + " cannot be changed";
         emit_error(ctx, msg);
+        return jl_cgval_t();
+    }
+    assert(strct.ispointer());
+    size_t byte_offset = jl_field_offset(sty, idx0);
+    Value *addr = data_pointer(ctx, strct);
+    if (byte_offset > 0) {
+        addr = ctx.builder.CreateInBoundsGEP(
+                T_int8,
+                emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8),
+                ConstantInt::get(T_size, byte_offset)); // TODO: use emit_struct_gep
+    }
+    jl_value_t *jfty = jl_svecref(sty->types, idx0);
+    if (!jl_field_isptr(sty, idx0) && jl_is_uniontype(jfty)) {
+        size_t fsz = 0, al = 0;
+        bool isptr = !jl_islayout_inline(jfty, &fsz, &al);
+        assert(!isptr && fsz == jl_field_size(sty, idx0) - 1); (void)isptr;
+        // compute tindex from rhs
+        jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
+        if (rhs_union.typ == jl_bottom_type)
+            return jl_cgval_t();
+        Value *ptindex = ctx.builder.CreateInBoundsGEP(T_int8, emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8), ConstantInt::get(T_size, fsz));
+        if (needlock)
+            emit_lockstate_value(ctx, strct, true);
+        BasicBlock *BB = ctx.builder.GetInsertBlock();
+        jl_cgval_t oldval = rhs;
+        if (!issetfield)
+            oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true);
+        Value *Success = NULL;
+        BasicBlock *DoneBB = NULL;
+        if (isreplacefield || ismodifyfield) {
+            if (ismodifyfield) {
+                if (needlock)
+                    emit_lockstate_value(ctx, strct, false);
+                const jl_cgval_t argv[3] = { cmp, oldval, rhs };
+                if (modifyop) {
+                    rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+                }
+                else {
+                    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+                    rhs = mark_julia_type(ctx, callval, true, jl_any_type);
+                }
+                if (!jl_subtype(rhs.typ, jfty)) {
+                    emit_typecheck(ctx, rhs, jfty, fname);
+                    rhs = update_julia_type(ctx, rhs, jfty);
+                }
+               rhs_union = convert_julia_type(ctx, rhs, jfty);
+                if (rhs_union.typ == jl_bottom_type)
+                    return jl_cgval_t();
+                if (needlock)
+                    emit_lockstate_value(ctx, strct, true);
+                cmp = oldval;
+                oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true);
+            }
+            BasicBlock *XchgBB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
+            DoneBB = BasicBlock::Create(jl_LLVMContext, "done_xchg", ctx.f);
+            Success = emit_f_is(ctx, oldval, cmp);
+            ctx.builder.CreateCondBr(Success, XchgBB, ismodifyfield ? BB : DoneBB);
+            ctx.builder.SetInsertPoint(XchgBB);
+        }
+        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
+        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
+        tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
+        // copy data
+        if (!rhs.isghost) {
+            emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
+        }
+        if (isreplacefield || ismodifyfield) {
+            ctx.builder.CreateBr(DoneBB);
+            ctx.builder.SetInsertPoint(DoneBB);
+        }
+        if (needlock)
+            emit_lockstate_value(ctx, strct, false);
+        if (isreplacefield) {
+            Success = ctx.builder.CreateZExt(Success, T_int8);
+            jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+        else if (ismodifyfield) {
+            jl_cgval_t argv[2] = {oldval, rhs};
+            jl_datatype_t *rettyp = jl_apply_modify_type(jfty);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+        return oldval;
+    }
+    else {
+        unsigned align = jl_field_align(sty, idx0);
+        bool isboxed = jl_field_isptr(sty, idx0);
+        size_t nfields = jl_datatype_nfields(sty);
+        bool maybe_null = idx0 >= nfields - (unsigned)sty->name->n_uninitialized;
+        return typed_store(ctx, addr, NULL, rhs, cmp, jfty, strct.tbaa, nullptr,
+            wb ? maybe_bitcast(ctx, data_pointer(ctx, strct), T_pjlvalue) : nullptr,
+            isboxed, Order, FailOrder, align,
+            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, modifyop, fname);
     }
 }
 
@@ -2890,7 +3385,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
     assert(jl_is_concrete_type(ty));
     jl_datatype_t *sty = (jl_datatype_t*)ty;
     size_t nf = jl_datatype_nfields(sty);
-    if (nf > 0 || sty->mutabl) {
+    if (nf > 0 || sty->name->mutabl) {
         if (deserves_stack(ty)) {
             Type *lt = julia_type_to_llvm(ctx, ty);
             unsigned na = nargs < nf ? nargs : nf;
@@ -3059,7 +3554,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             else
                 need_wb = false;
             emit_typecheck(ctx, rhs, jl_svecref(sty->types, i), "new");
-            emit_setfield(ctx, sty, strctinfo, i, rhs, false, need_wb);
+            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(), false, need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false, false, false, nullptr, "");
         }
         return strctinfo;
     }
@@ -3076,21 +3571,12 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
 
 static void emit_signal_fence(jl_codectx_t &ctx)
 {
-#if defined(_CPU_ARM_) || defined(_CPU_AARCH64_)
-    // LLVM generates very inefficient code (and might include function call)
-    // for signal fence. Fallback to the poor man signal fence with
-    // inline asm instead.
-    // https://llvm.org/bugs/show_bug.cgi?id=27545
-    ctx.builder.CreateCall(InlineAsm::get(FunctionType::get(T_void, false), "",
-                                      "~{memory}", true));
-#else
     ctx.builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
-#endif
 }
 
 static Value *emit_defer_signal(jl_codectx_t &ctx)
 {
-    Value *ptls = emit_bitcast(ctx, ctx.ptlsStates,
+    Value *ptls = emit_bitcast(ctx, get_current_ptls(ctx),
                                         PointerType::get(T_sigatomic, 0));
     Constant *offset = ConstantInt::getSigned(T_int32,
         offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t));
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 51b457b7acb88c..675afc3453fbad 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -26,12 +26,7 @@
 #define USED_FUNC
 #endif
 
-#if LLVM_VERSION_MAJOR >= 10
 using std::make_unique;
-#else
-using llvm::make_unique;
-#define PathSensitiveBugReport BugReport
-#endif
 
 namespace {
 using namespace clang;
@@ -42,11 +37,7 @@ using namespace ento;
 
 static const Stmt *getStmtForDiagnostics(const ExplodedNode *N)
 {
-#if LLVM_VERSION_MAJOR >= 10
     return N->getStmtForDiagnostics();
-#else
-    return PathDiagnosticLocation::getStmt(N);
-#endif
 }
 
 
@@ -737,7 +728,6 @@ bool GCChecker::isGCTrackedType(QualType QT) {
                    Name.endswith_lower("jl_module_t") ||
                    Name.endswith_lower("jl_tupletype_t") ||
                    Name.endswith_lower("jl_gc_tracked_buffer_t") ||
-                   Name.endswith_lower("jl_tls_states_t") ||
                    Name.endswith_lower("jl_binding_t") ||
                    Name.endswith_lower("jl_ordereddict_t") ||
                    Name.endswith_lower("jl_tvar_t") ||
diff --git a/src/codegen.cpp b/src/codegen.cpp
index f0edf7b19fa29f..5cc9f66ffaeffc 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -84,6 +84,10 @@
 #endif
 #include <llvm/Target/TargetMachine.h>
 
+#include "llvm/Support/Path.h" // for llvm::sys::path
+#include <llvm/Bitcode/BitcodeReader.h>
+#include <llvm/Linker/Linker.h>
+
 using namespace llvm;
 
 typedef Instruction TerminatorInst;
@@ -428,13 +432,14 @@ static const auto jlboxed_uint8_cache = new JuliaVariable{
     [](LLVMContext &C) { return (Type*)ArrayType::get(T_pjlvalue, 256); },
 };
 
-static const auto jltls_states_func = new JuliaFunction{
-    "julia.ptls_states",
+static const auto jlpgcstack_func = new JuliaFunction{
+    "julia.get_pgcstack",
     [](LLVMContext &C) { return FunctionType::get(PointerType::get(T_ppjlvalue, 0), false); },
     nullptr,
 };
 
 
+
 // important functions
 // Symbols are not gc-tracked, but we'll treat them as callee rooted anyway,
 // because they may come from a gc-rooted location
@@ -461,6 +466,12 @@ static const auto jlerror_func = new JuliaFunction{
             {T_pint8}, false); },
     get_attrs_noreturn,
 };
+static const auto jlatomicerror_func = new JuliaFunction{
+    "jl_atomic_error",
+    [](LLVMContext &C) { return FunctionType::get(T_void,
+            {T_pint8}, false); },
+    get_attrs_noreturn,
+};
 static const auto jltypeerror_func = new JuliaFunction{
     "jl_type_error",
     [](LLVMContext &C) { return FunctionType::get(T_void,
@@ -566,7 +577,7 @@ static const auto jlinvoke_func = new JuliaFunction{
 static const auto jlmethod_func = new JuliaFunction{
     "jl_method_def",
     [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-                {T_prjlvalue, T_prjlvalue, T_pjlvalue}, false); },
+                {T_prjlvalue, T_prjlvalue, T_prjlvalue, T_pjlvalue}, false); },
     nullptr,
 };
 static const auto jlgenericfunction_func = new JuliaFunction{
@@ -575,6 +586,24 @@ static const auto jlgenericfunction_func = new JuliaFunction{
                 {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue, T_pjlvalue}, false); },
     nullptr,
 };
+static const auto jllockvalue_func = new JuliaFunction{
+    "jl_lock_value",
+    [](LLVMContext &C) { return FunctionType::get(T_void,
+            {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            AttributeSet(),
+            {Attributes(C, {Attribute::NoCapture})}); },
+};
+static const auto jlunlockvalue_func = new JuliaFunction{
+    "jl_unlock_value",
+    [](LLVMContext &C) { return FunctionType::get(T_void,
+            {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            AttributeSet(),
+            {Attributes(C, {Attribute::NoCapture})}); },
+};
 static const auto jlenter_func = new JuliaFunction{
     "jl_enter_handler",
     [](LLVMContext &C) { return FunctionType::get(T_void,
@@ -603,12 +632,15 @@ static const auto jl_excstack_state_func = new JuliaFunction{
     [](LLVMContext &C) { return FunctionType::get(T_size, false); },
     nullptr,
 };
-static const auto jlegal_func = new JuliaFunction{
-    "jl_egal",
+static const auto jlegalx_func = new JuliaFunction{
+    "jl_egal__unboxed",
     [](LLVMContext &C) {
-        Type *T = PointerType::get(T_jlvalue, AddressSpace::CalleeRooted);
-        return FunctionType::get(T_int32, {T, T}, false); },
-    nullptr,
+        Type *T = PointerType::get(T_jlvalue, AddressSpace::Derived);
+        return FunctionType::get(T_int32, {T, T, T_prjlvalue}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
+            AttributeSet(),
+            None); },
 };
 static const auto jl_alloc_obj_func = new JuliaFunction{
     "julia.gc_alloc_obj",
@@ -830,7 +862,6 @@ static const auto pointer_from_objref_func = new JuliaFunction{
 };
 
 static const auto jltuple_func = new JuliaFunction{"jl_f_tuple", get_func_sig, get_func_attrs};
-static const auto jlgetfield_func = new JuliaFunction{"jl_f_getfield", get_func_sig, get_func_attrs};
 static const std::map<jl_fptr_args_t, JuliaFunction*> builtin_func_map = {
     { &jl_f_is,                 new JuliaFunction{"jl_f_is", get_func_sig, get_func_attrs} },
     { &jl_f_typeof,             new JuliaFunction{"jl_f_typeof", get_func_sig, get_func_attrs} },
@@ -850,8 +881,10 @@ static const std::map<jl_fptr_args_t, JuliaFunction*> builtin_func_map = {
     { &jl_f_invoke,             new JuliaFunction{"jl_f_invoke", get_func_sig, get_func_attrs} },
     { &jl_f_invoke_kwsorter,    new JuliaFunction{"jl_f_invoke_kwsorter", get_func_sig, get_func_attrs} },
     { &jl_f_isdefined,          new JuliaFunction{"jl_f_isdefined", get_func_sig, get_func_attrs} },
-    { &jl_f_getfield,           jlgetfield_func },
+    { &jl_f_getfield,           new JuliaFunction{"jl_f_getfield", get_func_sig, get_func_attrs} },
     { &jl_f_setfield,           new JuliaFunction{"jl_f_setfield", get_func_sig, get_func_attrs} },
+    { &jl_f_swapfield,          new JuliaFunction{"jl_f_swapfield", get_func_sig, get_func_attrs} },
+    { &jl_f_modifyfield,        new JuliaFunction{"jl_f_modifyfield", get_func_sig, get_func_attrs} },
     { &jl_f_fieldtype,          new JuliaFunction{"jl_f_fieldtype", get_func_sig, get_func_attrs} },
     { &jl_f_nfields,            new JuliaFunction{"jl_f_nfields", get_func_sig, get_func_attrs} },
     { &jl_f__expr,              new JuliaFunction{"jl_f__expr", get_func_sig, get_func_attrs} },
@@ -904,12 +937,12 @@ static MDNode *best_tbaa(jl_value_t *jt) {
 // note that this includes jl_isbits, although codegen should work regardless
 static bool jl_is_concrete_immutable(jl_value_t* t)
 {
-    return jl_is_immutable_datatype(t) && ((jl_datatype_t*)t)->layout;
+    return jl_is_immutable_datatype(t) && ((jl_datatype_t*)t)->isconcretetype;
 }
 
 static bool jl_is_pointerfree(jl_value_t* t)
 {
-    if (!jl_is_immutable_datatype(t))
+    if (!jl_is_concrete_immutable(t))
         return 0;
     const jl_datatype_layout_t *layout = ((jl_datatype_t*)t)->layout;
     return layout && layout->npointers == 0;
@@ -917,11 +950,13 @@ static bool jl_is_pointerfree(jl_value_t* t)
 
 // these queries are usually related, but we split them out here
 // for convenience and clarity (and because it changes the calling convention)
-static bool deserves_stack(jl_value_t* t, bool pointerfree=false)
+// n.b. this must include jl_is_datatype_singleton (ghostType) and primitive types
+static bool deserves_stack(jl_value_t* t)
 {
     if (!jl_is_concrete_immutable(t))
         return false;
-    return ((jl_datatype_t*)t)->isinlinealloc;
+    jl_datatype_t *dt = (jl_datatype_t*)t;
+    return jl_is_datatype_singleton(dt) || jl_datatype_isinlinealloc(dt, 0);
 }
 static bool deserves_argbox(jl_value_t* t)
 {
@@ -1089,8 +1124,7 @@ class jl_codectx_t {
     int nvargs = -1;
     bool is_opaque_closure = false;
 
-    CallInst *ptlsStates = NULL;
-    Value *signalPage = NULL;
+    CallInst *pgcstack = NULL;
     Value *world_age_field = NULL;
 
     bool debug_enabled = false;
@@ -1126,11 +1160,18 @@ static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name,
 static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
 static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg);
 static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0);
-static void CreateTrap(IRBuilder<> &irbuilder);
+static Value *get_current_task(jl_codectx_t &ctx);
+static Value *get_current_ptls(jl_codectx_t &ctx);
+static Value *get_current_signal_page(jl_codectx_t &ctx);
+static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+                             const jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+                             const jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
+                        Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv);
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
 static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
@@ -1199,12 +1240,15 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con
 
 static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty)
 {
-    return new AllocaInst(lty, 0, "", /*InsertBefore=*/ctx.ptlsStates);
+    return new AllocaInst(lty, 0, "", /*InsertBefore=*/ctx.pgcstack);
 }
 
 static void undef_derived_strct(IRBuilder<> &irbuilder, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
 {
     assert(ptr->getType()->getPointerAddressSpace() != AddressSpace::Tracked);
+    size_t first_offset = sty->layout->nfields ? jl_field_offset(sty, 0) : 0;
+    if (first_offset != 0)
+        irbuilder.CreateMemSet(ptr, ConstantInt::get(T_int8, 0), first_offset, MaybeAlign(0));
     size_t i, np = sty->layout->npointers;
     if (np == 0)
         return;
@@ -1359,7 +1403,7 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &
             if (jl_is_concrete_type(utyp))
                 alwaysboxed = !jl_is_pointerfree(utyp);
             else
-                alwaysboxed = !((jl_datatype_t*)utyp)->abstract && ((jl_datatype_t*)utyp)->mutabl;
+                alwaysboxed = !((jl_datatype_t*)utyp)->name->abstract && ((jl_datatype_t*)utyp)->name->mutabl;
             if (alwaysboxed) {
                 // discovered that this union-split type must actually be isboxed
                 if (v.Vboxed) {
@@ -1409,7 +1453,18 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 
 // --- utilities ---
 
-static void CreateTrap(IRBuilder<> &irbuilder)
+static Constant *undef_value_for_type(Type *T) {
+    auto tracked = CountTrackedPointers(T);
+    Constant *undef;
+    if (tracked.count)
+        // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
+        undef = Constant::getNullValue(T);
+    else
+        undef = UndefValue::get(T);
+    return undef;
+}
+
+static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block)
 {
     Function *f = irbuilder.GetInsertBlock()->getParent();
     Function *trap_func = Intrinsic::getDeclaration(
@@ -1417,8 +1472,13 @@ static void CreateTrap(IRBuilder<> &irbuilder)
             Intrinsic::trap);
     irbuilder.CreateCall(trap_func);
     irbuilder.CreateUnreachable();
-    BasicBlock *newBB = BasicBlock::Create(irbuilder.getContext(), "after_noret", f);
-    irbuilder.SetInsertPoint(newBB);
+    if (create_new_block) {
+        BasicBlock *newBB = BasicBlock::Create(irbuilder.getContext(), "after_noret", f);
+        irbuilder.SetInsertPoint(newBB);
+    }
+    else {
+        irbuilder.ClearInsertionPoint();
+    }
 }
 
 #if 0 // this code is likely useful, but currently unused
@@ -1440,6 +1500,7 @@ static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test)
 #endif
 #endif
 
+
 #include "cgutils.cpp"
 
 static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
@@ -1504,11 +1565,13 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
             // actually need it.
             Value *union_box_dt = NULL;
             BasicBlock *union_isaBB = NULL;
+            BasicBlock *post_union_isaBB = NULL;
             auto maybe_setup_union_isa = [&]() {
                 if (!union_isaBB) {
                     union_isaBB = BasicBlock::Create(jl_LLVMContext, "union_isa", ctx.f);
                     ctx.builder.SetInsertPoint(union_isaBB);
-                    union_box_dt = emit_typeof(ctx, v.Vboxed);
+                    union_box_dt = emit_typeof_or_null(ctx, v.Vboxed);
+                    post_union_isaBB = ctx.builder.GetInsertBlock();
                 }
             };
 
@@ -1540,7 +1603,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 ctx.builder.SetInsertPoint(postBB);
                 PHINode *tindex_phi = ctx.builder.CreatePHI(T_int8, 2);
                 tindex_phi->addIncoming(new_tindex, currBB);
-                tindex_phi->addIncoming(union_box_tindex, union_isaBB);
+                tindex_phi->addIncoming(union_box_tindex, post_union_isaBB);
                 new_tindex = tindex_phi;
             }
         }
@@ -1684,7 +1747,7 @@ static void jl_setup_module(Module *m, const jl_cgparams_t *params = &jl_default
         m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", dwarf_version);
     }
     if (!m->getModuleFlag("Debug Info Version"))
-        m->addModuleFlag(llvm::Module::Error, "Debug Info Version",
+        m->addModuleFlag(llvm::Module::Warning, "Debug Info Version",
             llvm::DEBUG_METADATA_VERSION);
     m->setDataLayout(jl_data_layout);
     m->setTargetTriple(jl_TargetMachine->getTargetTriple().str());
@@ -1713,11 +1776,15 @@ static void jl_init_function(Function *F)
     F->setHasUWTable(); // force NeedsWinEH
 #endif
 #ifdef JL_DISABLE_FPO
-#if LLVM_VERSION_MAJOR >= 8
     F->addFnAttr("frame-pointer", "all");
-#else
-    F->addFnAttr("no-frame-pointer-elim", "true");
 #endif
+#if !defined(_COMPILER_ASAN_ENABLED_) && !defined(_OS_WINDOWS_)
+    // ASAN won't like us accessing undefined memory causing spurious issues,
+    // and Windows has platform-specific handling which causes it to mishandle
+    // this annotation. Other platforms should just ignore this if they don't
+    // implement it.
+    F->addFnAttr("probe-stack", "inline-asm");
+    //F->addFnAttr("stack-probe-size", 4096); // can use this to change the default
 #endif
 }
 
@@ -1831,6 +1898,16 @@ static void coverageAllocLine(StringRef filename, int line)
     allocLine(coverageData[filename], line);
 }
 
+extern "C" JL_DLLEXPORT void jl_coverage_visit_line(const char* filename_, size_t len_filename, int line)
+{
+    StringRef filename = StringRef(filename_, len_filename);
+    if (imaging_mode || filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
+        return;
+    std::vector<logdata_block*> &vec = coverageData[filename];
+    uint64_t *ptr = allocLine(vec, line);
+    (*ptr)++;
+}
+
 // Memory allocation log (malloc_log)
 
 static logdata_t mallocData;
@@ -2007,9 +2084,9 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args,
         v[i] = args[i].constant;
     }
     assert(v[0] == jl_builtin_apply_type);
-    size_t last_age = jl_get_ptls_states()->world_age;
+    size_t last_age = jl_current_task->world_age;
     // call apply_type, but ignore errors. we know that will work in world 1.
-    jl_get_ptls_states()->world_age = 1;
+    jl_current_task->world_age = 1;
     jl_value_t *result;
     JL_TRY {
         result = jl_apply(v, nargs);
@@ -2017,7 +2094,7 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args,
     JL_CATCH {
         result = NULL;
     }
-    jl_get_ptls_states()->world_age = last_age;
+    jl_current_task->world_age = last_age;
     return result;
 }
 
@@ -2093,9 +2170,9 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                             return NULL;
                         }
                     }
-                    size_t last_age = jl_get_ptls_states()->world_age;
+                    size_t last_age = jl_current_task->world_age;
                     // here we know we're calling specific builtin functions that work in world 1.
-                    jl_get_ptls_states()->world_age = 1;
+                    jl_current_task->world_age = 1;
                     jl_value_t *result;
                     JL_TRY {
                         result = jl_apply(v, n+1);
@@ -2103,7 +2180,7 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                     JL_CATCH {
                         result = NULL;
                     }
-                    jl_get_ptls_states()->world_age = last_age;
+                    jl_current_task->world_age = last_age;
                     JL_GC_POP();
                     return result;
                 }
@@ -2313,97 +2390,14 @@ static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *
     return emit_checked_var(ctx, bp, name, false, tbaa_binding);
 }
 
-static jl_cgval_t emit_getfield(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_sym_t *name)
-{
-    if (strct.constant && jl_is_module(strct.constant))
-        return emit_globalref(ctx, (jl_module_t*)strct.constant, name);
-
-    jl_datatype_t *sty = (jl_datatype_t*)strct.typ;
-    if (jl_is_type_type((jl_value_t*)sty) && jl_is_concrete_type(jl_tparam0(sty)))
-        sty = (jl_datatype_t*)jl_typeof(jl_tparam0(sty));
-    sty = (jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)sty);
-    if (jl_is_structtype(sty) && sty != jl_module_type && sty->layout) {
-        unsigned idx = jl_field_index(sty, name, 0);
-        if (idx != (unsigned)-1) {
-            return emit_getfield_knownidx(ctx, strct, idx, sty);
-        }
-    }
-    // TODO: attempt better codegen for approximate types, if the types
-    // and offsets of some fields are independent of parameters.
-
-    // TODO: generic getfield func with more efficient calling convention
-    jl_cgval_t myargs_array[2] = {
-        strct,
-        mark_julia_const((jl_value_t*)name)
-    };
-    Value *result = emit_jlcall(ctx, jlgetfield_func, V_rnull, myargs_array, 2, JLCALL_F_CC);
-    return mark_julia_type(ctx, result, true, jl_any_type);
-}
-
-template<typename Func>
-static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Constant *defval, Func &&func)
-{
-    if (auto Cond = dyn_cast<ConstantInt>(ifnot)) {
-        if (Cond->isZero())
-            return defval;
-        return func();
-    }
-    BasicBlock *currBB = ctx.builder.GetInsertBlock();
-    BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "guard_pass", ctx.f);
-    BasicBlock *exitBB = BasicBlock::Create(jl_LLVMContext, "guard_exit", ctx.f);
-    ctx.builder.CreateCondBr(ifnot, passBB, exitBB);
-    ctx.builder.SetInsertPoint(passBB);
-    auto res = func();
-    passBB = ctx.builder.GetInsertBlock();
-    ctx.builder.CreateBr(exitBB);
-    ctx.builder.SetInsertPoint(exitBB);
-    if (defval == nullptr)
-        return nullptr;
-    PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2);
-    phi->addIncoming(defval, currBB);
-    phi->addIncoming(res, passBB);
-    return phi;
-}
-
-template<typename Func>
-static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, bool defval, Func &&func)
-{
-    return emit_guarded_test(ctx, ifnot, ConstantInt::get(T_int1, defval), func);
-}
-
-template<typename Func>
-static Value *emit_nullcheck_guard(jl_codectx_t &ctx, Value *nullcheck, Func &&func)
-{
-    if (!nullcheck)
-        return func();
-    return emit_guarded_test(ctx, null_pointer_cmp(ctx, nullcheck), false, func);
-}
-
-template<typename Func>
-static Value *emit_nullcheck_guard2(jl_codectx_t &ctx, Value *nullcheck1,
-                                    Value *nullcheck2, Func &&func)
-{
-    if (!nullcheck1)
-        return emit_nullcheck_guard(ctx, nullcheck2, func);
-    if (!nullcheck2)
-        return emit_nullcheck_guard(ctx, nullcheck1, func);
-    nullcheck1 = null_pointer_cmp(ctx, nullcheck1);
-    nullcheck2 = null_pointer_cmp(ctx, nullcheck2);
-    // If both are NULL, return true.
-    return emit_guarded_test(ctx, ctx.builder.CreateOr(nullcheck1, nullcheck2), true, [&] {
-        return emit_guarded_test(ctx, ctx.builder.CreateAnd(nullcheck1, nullcheck2),
-                                 false, func);
-    });
-}
-
 static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                                Value *nullcheck1, Value *nullcheck2)
 {
     if (jl_pointer_egal(arg1.typ) || jl_pointer_egal(arg2.typ)) {
+        assert((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant) &&
+                "Expected unboxed cases to be handled earlier");
         Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.V;
         Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.V;
-        assert(varg1 && varg2 && (arg1.isboxed || arg1.TIndex) && (arg2.isboxed || arg2.TIndex) &&
-                "Only boxed types are valid for pointer comparison.");
         varg1 = maybe_decay_tracked(ctx, varg1);
         varg2 = maybe_decay_tracked(ctx, varg2);
         if (cast<PointerType>(varg1->getType())->getAddressSpace() != cast<PointerType>(varg2->getType())->getAddressSpace()) {
@@ -2415,26 +2409,40 @@ static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const
     }
 
     return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] {
-        Value *varg1 = mark_callee_rooted(ctx, boxed(ctx, arg1));
-        Value *varg2 = mark_callee_rooted(ctx, boxed(ctx, arg2));
-        return ctx.builder.CreateTrunc(ctx.builder.CreateCall(prepare_call(jlegal_func),
-                                                              {varg1, varg2}), T_int1);
+        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, value_to_pointer(ctx, arg1).V, T_pjlvalue);
+        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, value_to_pointer(ctx, arg2).V, T_pjlvalue);
+        varg1 = decay_derived(ctx, varg1);
+        varg2 = decay_derived(ctx, varg2);
+        Value *neq = ctx.builder.CreateICmpNE(varg1, varg2);
+        return emit_guarded_test(ctx, neq, true, [&] {
+            Value *dtarg = emit_typeof_boxed(ctx, arg1);
+            Value *dt_eq = ctx.builder.CreateICmpEQ(dtarg, emit_typeof_boxed(ctx, arg2));
+            return emit_guarded_test(ctx, dt_eq, false, [&] {
+                return ctx.builder.CreateTrunc(ctx.builder.CreateCall(prepare_call(jlegalx_func),
+                                                                      {varg1, varg2, dtarg}), T_int1);
+            });
+        });
     });
 }
 
 static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2);
-static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
-                        Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
 
 static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2)
 {
     assert(jl_egal(arg1.typ, arg2.typ) && arg1.TIndex && arg2.TIndex && jl_is_uniontype(arg1.typ) && "unimplemented");
     Value *tindex = arg1.TIndex;
+    tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x7f));
+    Value *tindex2 = arg2.TIndex;
+    tindex2 = ctx.builder.CreateAnd(tindex2, ConstantInt::get(T_int8, 0x7f));
+    Value *typeeq = ctx.builder.CreateICmpEQ(tindex, tindex2);
+    tindex = ctx.builder.CreateSelect(typeeq, tindex, ConstantInt::get(T_int8, 0x00));
     BasicBlock *defaultBB = BasicBlock::Create(jl_LLVMContext, "unionbits_is_boxed", ctx.f);
     SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB);
     BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_unionbits_is", ctx.f);
     ctx.builder.SetInsertPoint(postBB);
     PHINode *phi = ctx.builder.CreatePHI(T_int1, 2);
+    switchInst->addCase(ConstantInt::get(T_int8, 0), postBB);
+    phi->addIncoming(ConstantInt::get(T_int1, 0), switchInst->getParent());
     unsigned counter = 0;
     bool allunboxed = for_each_uniontype_small(
         [&](unsigned idx, jl_datatype_t *jt) {
@@ -2458,7 +2466,7 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
     ctx.builder.CreateCall(trap_func);
     ctx.builder.CreateUnreachable();
     ctx.builder.SetInsertPoint(postBB);
-    return ctx.builder.CreateAnd(phi, ctx.builder.CreateICmpEQ(arg1.TIndex, arg2.TIndex));
+    return phi;
 }
 
 static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
@@ -2540,8 +2548,8 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
                     continue;
                 Value *nullcheck1 = nullptr;
                 Value *nullcheck2 = nullptr;
-                auto fld1 = emit_getfield_knownidx(ctx, arg1, i, sty, &nullcheck1);
-                auto fld2 = emit_getfield_knownidx(ctx, arg2, i, sty, &nullcheck2);
+                auto fld1 = emit_getfield_knownidx(ctx, arg1, i, sty, jl_memory_order_notatomic, &nullcheck1);
+                auto fld2 = emit_getfield_knownidx(ctx, arg2, i, sty, jl_memory_order_notatomic, &nullcheck2);
                 Value *fld_answer;
                 if (jl_field_isptr(sty, i) && jl_is_concrete_immutable(fldty)) {
                     // concrete immutables that are !isinlinealloc might be reference cycles
@@ -2565,6 +2573,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
 // representing the undef-ness of `arg1` and `arg2`.
 // This can only happen when comparing two fields of the same time and the result should be
 // true if both are NULL
+// Like the runtime counterpart, this is codegen guaranteed to be non-allocating and to exclude safepoints
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1, Value *nullcheck2)
 {
@@ -2585,46 +2594,45 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
         // since it is normalized to `::Type{Union{}}` instead...
         if (arg1.TIndex)
             return emit_nullcheck_guard(ctx, nullcheck1, [&] {
-                return emit_isa(ctx, arg1, rt2, NULL).first; // rt2 is a singleton type
+                return emit_exactly_isa(ctx, arg1, rt2); // rt2 is a singleton type
             });
         if (arg2.TIndex)
             return emit_nullcheck_guard(ctx, nullcheck2, [&] {
-                return emit_isa(ctx, arg2, rt1, NULL).first; // rt1 is a singleton type
+                return emit_exactly_isa(ctx, arg2, rt1); // rt1 is a singleton type
             });
+        if (!(arg1.isboxed || arg1.constant) || !(arg2.isboxed || arg2.constant))
+            // not TIndex && not boxed implies it is an unboxed value of a different type from this singleton
+            // (which was probably caught above, but just to be safe, we repeat it here explicitly)
+            return ConstantInt::get(T_int1, 0);
+        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, T_pjlvalue);
+        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, T_pjlvalue);
         // rooting these values isn't needed since we won't load this pointer
         // and we know at least one of them is a unique Singleton
         // which is already enough to ensure pointer uniqueness for this test
         // even if the other pointer managed to get garbage collected
-        return ctx.builder.CreateICmpEQ(
-            mark_callee_rooted(ctx, boxed(ctx, arg1)),
-            mark_callee_rooted(ctx, boxed(ctx, arg2)));
+        // TODO: use emit_pointer_from_objref instead, per comment above
+        return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
     }
 
     if (jl_type_intersection(rt1, rt2) == (jl_value_t*)jl_bottom_type) // types are disjoint (exhaustive test)
         return ConstantInt::get(T_int1, 0);
 
-    // If both sides are boxed or can be trivially boxed,
-    // we'll prefer to do a pointer check.
-    // At this point, we know that at least one of the arguments isn't a constant
-    // so a runtime content check will involve at least one load from the
-    // pointer (and likely a type check)
-    // so a pointer comparison should be no worse than that even in imaging mode
-    // when the constant pointer has to be loaded.
-    if ((arg1.V || arg1.constant) && (arg2.V || arg2.constant) &&
-        (jl_pointer_egal(rt1) || jl_pointer_egal(rt2)) &&
-        // jl_pointer_egal returns true for Bool, which is not helpful here
-        (rt1 != (jl_value_t*)jl_bool_type || rt2 != (jl_value_t*)jl_bool_type))
-        return ctx.builder.CreateICmpEQ(boxed(ctx, arg1), boxed(ctx, arg2));
-
     bool justbits1 = jl_is_concrete_immutable(rt1);
     bool justbits2 = jl_is_concrete_immutable(rt2);
     if (justbits1 || justbits2) { // whether this type is unique'd by value
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] () -> Value* {
             jl_value_t *typ = justbits1 ? rt1 : rt2;
+            if (typ == (jl_value_t*)jl_bool_type) { // aka jl_pointer_egal
+                // some optimizations for bool, since pointer comparison may be better
+                if ((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant)) { // aka have-fast-pointer
+                    Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, T_pjlvalue);
+                    Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, T_pjlvalue);
+                    return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
+                }
+            }
             if (rt1 == rt2)
                 return emit_bits_compare(ctx, arg1, arg2);
-            Value *same_type = (typ == rt2) ? emit_isa(ctx, arg1, typ, NULL).first :
-                emit_isa(ctx, arg2, typ, NULL).first;
+            Value *same_type = emit_exactly_isa(ctx, (typ == rt2 ? arg1 : arg2), typ);
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
             BasicBlock *isaBB = BasicBlock::Create(jl_LLVMContext, "is", ctx.f);
             BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_is", ctx.f);
@@ -2642,6 +2650,25 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
         });
     }
 
+    // If either sides is boxed or can be trivially boxed,
+    // we'll prefer to do a pointer check.
+    // At this point, we know that at least one of the arguments isn't a constant
+    // so a runtime content check will involve at least one load from the
+    // pointer (and likely a type check)
+    // so a pointer comparison should be no worse than that even in imaging mode
+    // when the constant pointer has to be loaded.
+    // Note that we ignore nullcheck, since in the case where it may be set, we
+    // also knew the types of both fields must be the same so there cannot be
+    // any unboxed values on either side.
+    if (jl_pointer_egal(rt1) || jl_pointer_egal(rt2)) {
+        // n.b. Vboxed == isboxed || Tindex
+        if (!(arg1.Vboxed || arg1.constant) || !(arg2.Vboxed || arg2.constant))
+            return ConstantInt::get(T_int1, 0);
+        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, T_pjlvalue);
+        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, T_pjlvalue);
+        return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
+    }
+
     // TODO: handle the case where arg1.typ != arg2.typ, or when one of these isn't union,
     //       or when the union can be pointer
     if (arg1.TIndex && arg2.TIndex && jl_egal(arg1.typ, arg2.typ) &&
@@ -2653,6 +2680,102 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     return emit_box_compare(ctx, arg1, arg2, nullcheck1, nullcheck2);
 }
 
+static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
+                           const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+{
+    bool issetfield = f == jl_builtin_setfield;
+    bool isreplacefield = f == jl_builtin_replacefield;
+    bool isswapfield = f == jl_builtin_swapfield;
+    bool ismodifyfield = f == jl_builtin_modifyfield;
+    const jl_cgval_t undefval;
+    const jl_cgval_t &obj = argv[1];
+    const jl_cgval_t &fld = argv[2];
+    jl_cgval_t val = argv[isreplacefield || ismodifyfield ? 4 : 3];
+    const jl_cgval_t &cmp = isreplacefield || ismodifyfield ? argv[3] : undefval;
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : "modifyfield!";
+    if (nargs >= (isreplacefield || ismodifyfield ? 5 : 4)) {
+        const jl_cgval_t &ord = argv[isreplacefield || ismodifyfield ? 5 : 4];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
+    }
+    enum jl_memory_order fail_order = order;
+    if (isreplacefield && nargs == 6) {
+        const jl_cgval_t &ord = argv[6];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    }
+    if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        *ret = jl_cgval_t(); // unreachable
+        return true;
+    }
+
+    jl_datatype_t *uty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
+    if (jl_is_datatype(uty) && jl_struct_try_layout(uty)) {
+        ssize_t idx = -1;
+        if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
+            idx = jl_field_index(uty, (jl_sym_t*)fld.constant, 0);
+        }
+        else if (fld.constant && fld.typ == (jl_value_t*)jl_long_type) {
+            ssize_t i = jl_unbox_long(fld.constant);
+            if (i > 0 && i <= jl_datatype_nfields(uty))
+                idx = i - 1;
+        }
+        if (idx != -1) {
+            jl_value_t *ft = jl_svecref(uty->types, idx);
+            if (!jl_has_free_typevars(ft)) {
+                if (!ismodifyfield && !jl_subtype(val.typ, ft)) {
+                    emit_typecheck(ctx, val, ft, fname);
+                    val = update_julia_type(ctx, val, ft);
+                }
+                // TODO: attempt better codegen for approximate types
+                bool isboxed = jl_field_isptr(uty, idx);
+                bool isatomic = jl_field_isatomic(uty, idx);
+                bool needlock = isatomic && !isboxed && jl_datatype_size(jl_field_type(uty, idx)) > MAX_ATOMIC_SIZE;
+                if (isatomic == (order == jl_memory_order_notatomic)) {
+                    emit_atomic_error(ctx,
+                            issetfield ?
+                            (isatomic ? "setfield!: atomic field cannot be written non-atomically"
+                                      : "setfield!: non-atomic field cannot be written atomically") :
+                            isreplacefield ?
+                            (isatomic ? "replacefield!: atomic field cannot be written non-atomically"
+                                      : "replacefield!: non-atomic field cannot be written atomically") :
+                            isswapfield ?
+                            (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
+                                      : "swapfield!: non-atomic field cannot be written atomically") :
+                            (isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
+                                      : "modifyfield!: non-atomic field cannot be written atomically"));
+                    *ret = jl_cgval_t();
+                    return true;
+                }
+                if (isatomic == (fail_order == jl_memory_order_notatomic)) {
+                    emit_atomic_error(ctx,
+                            (isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
+                                      : "replacefield!: non-atomic field cannot be accessed atomically"));
+                    *ret = jl_cgval_t();
+                    return true;
+                }
+                *ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true, true,
+                        (needlock || order <= jl_memory_order_notatomic)
+                        ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                        : get_llvm_atomic_order(order),
+                        (needlock || fail_order <= jl_memory_order_notatomic)
+                        ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                        : get_llvm_atomic_order(fail_order),
+                        needlock, issetfield, isreplacefield, isswapfield, ismodifyfield,
+                        modifyop, fname);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     emit_function(
         jl_method_instance_t *lam,
@@ -2827,6 +2950,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     ety = (jl_value_t*)jl_any_type;
                 ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
                 jl_value_t *boundscheck = argv[1].constant;
+                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayref");
                 Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[3], nargs - 2, boundscheck);
                 if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
                     assert(((jl_datatype_t*)ety)->instance != NULL);
@@ -2861,7 +2985,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     *ret = typed_load(ctx,
                             emit_arrayptr(ctx, ary, ary_ex),
                             idx, ety,
-                            !isboxed ? tbaa_arraybuf : tbaa_ptrarraybuf, aliasscope);
+                            isboxed ? tbaa_ptrarraybuf : tbaa_arraybuf,
+                            aliasscope,
+                            isboxed,
+                            AtomicOrdering::NotAtomic);
                 }
                 return true;
             }
@@ -2870,7 +2997,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
     else if (f == jl_builtin_arrayset && nargs >= 4) {
         const jl_cgval_t &ary = argv[2];
-        const jl_cgval_t &val = argv[3];
+        jl_cgval_t val = argv[3];
         bool indices_ok = true;
         for (size_t i = 4; i <= nargs; i++) {
             if (argv[i].typ != (jl_value_t*)jl_long_type) {
@@ -2883,104 +3010,163 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             jl_value_t *ety = jl_tparam0(aty_dt);
             jl_value_t *ndp = jl_tparam1(aty_dt);
             if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 4)) {
-                if (jl_subtype(val.typ, ety)) { // TODO: probably should just convert this to a type-assert
-                    size_t elsz = 0, al = 0;
-                    int union_max = jl_islayout_inline(ety, &elsz, &al);
-                    bool isboxed = (union_max == 0);
-                    if (isboxed)
-                        ety = (jl_value_t*)jl_any_type;
-                    jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                    ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                    jl_value_t *boundscheck = argv[1].constant;
-                    Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
-                    if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                        // no-op
-                    }
-                    else {
-                        PHINode *data_owner = NULL; // owner object against which the write barrier must check
-                        if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
-                            Value *aryv = boxed(ctx, ary);
-                            Value *flags = emit_arrayflags(ctx, ary);
-                            // the owner of the data is ary itself except if ary->how == 3
-                            flags = ctx.builder.CreateAnd(flags, 3);
-                            Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
-                            BasicBlock *curBB = ctx.builder.GetInsertBlock();
-                            BasicBlock *ownedBB = BasicBlock::Create(jl_LLVMContext, "array_owned", ctx.f);
-                            BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge_own", ctx.f);
-                            ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
-                            ctx.builder.SetInsertPoint(ownedBB);
-                            // load owner pointer
-                            Instruction *own_ptr;
-                            if (jl_is_long(ndp)) {
-                                own_ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue,
-                                        ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue,
-                                            emit_bitcast(ctx, decay_derived(ctx, aryv), T_pprjlvalue),
-                                            jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
-                                        Align(sizeof(void*)));
-                                tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
-                            }
-                            else {
-                                own_ptr = ctx.builder.CreateCall(
-                                    prepare_call(jlarray_data_owner_func),
-                                    {aryv});
-                            }
-                            ctx.builder.CreateBr(mergeBB);
-                            ctx.builder.SetInsertPoint(mergeBB);
-                            data_owner = ctx.builder.CreatePHI(T_prjlvalue, 2);
-                            data_owner->addIncoming(aryv, curBB);
-                            data_owner->addIncoming(own_ptr, ownedBB);
+                if (!jl_subtype(val.typ, ety)) {
+                    emit_typecheck(ctx, val, ety, "arrayset");
+                    val = update_julia_type(ctx, val, ety);
+                }
+                size_t elsz = 0, al = 0;
+                int union_max = jl_islayout_inline(ety, &elsz, &al);
+                bool isboxed = (union_max == 0);
+                if (isboxed)
+                    ety = (jl_value_t*)jl_any_type;
+                jl_value_t *ary_ex = jl_exprarg(ex, 2);
+                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
+                jl_value_t *boundscheck = argv[1].constant;
+                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
+                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
+                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
+                    // no-op
+                }
+                else {
+                    PHINode *data_owner = NULL; // owner object against which the write barrier must check
+                    if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
+                        Value *aryv = boxed(ctx, ary);
+                        Value *flags = emit_arrayflags(ctx, ary);
+                        // the owner of the data is ary itself except if ary->how == 3
+                        flags = ctx.builder.CreateAnd(flags, 3);
+                        Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
+                        BasicBlock *curBB = ctx.builder.GetInsertBlock();
+                        BasicBlock *ownedBB = BasicBlock::Create(jl_LLVMContext, "array_owned", ctx.f);
+                        BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge_own", ctx.f);
+                        ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
+                        ctx.builder.SetInsertPoint(ownedBB);
+                        // load owner pointer
+                        Instruction *own_ptr;
+                        if (jl_is_long(ndp)) {
+                            own_ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue,
+                                    ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue,
+                                        emit_bitcast(ctx, decay_derived(ctx, aryv), T_pprjlvalue),
+                                        jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
+                                    Align(sizeof(void*)));
+                            tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
                         }
-                        if (jl_is_uniontype(ety)) {
-                            Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
-                            Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
-                            // compute tindex from val
-                            jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
-                            Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
-                            tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
-                            Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
-                            Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
-                            Value *offset = emit_arrayoffset(ctx, ary, nd);
-                            Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
-                            Value *selidx_m = emit_arraylen(ctx, ary);
-                            Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                            Value *ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                            ptindex = emit_bitcast(ctx, ptindex, T_pint8);
-                            ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
-                            ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
-                            tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
-                            if (jl_is_datatype(val.typ) && jl_datatype_size(val.typ) == 0) {
-                                // no-op
-                            }
-                            else {
-                                // copy data
-                                Value *addr = ctx.builder.CreateInBoundsGEP(AT, data, idx);
-                                emit_unionmove(ctx, addr, tbaa_arraybuf, val, nullptr);
-                            }
+                        else {
+                            own_ptr = ctx.builder.CreateCall(
+                                prepare_call(jlarray_data_owner_func),
+                                {aryv});
+                        }
+                        ctx.builder.CreateBr(mergeBB);
+                        ctx.builder.SetInsertPoint(mergeBB);
+                        data_owner = ctx.builder.CreatePHI(T_prjlvalue, 2);
+                        data_owner->addIncoming(aryv, curBB);
+                        data_owner->addIncoming(own_ptr, ownedBB);
+                    }
+                    if (!isboxed && jl_is_uniontype(ety)) {
+                        Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
+                        Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
+                        // compute tindex from val
+                        jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
+                        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
+                        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
+                        Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
+                        Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
+                        Value *offset = emit_arrayoffset(ctx, ary, nd);
+                        Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
+                        Value *selidx_m = emit_arraylen(ctx, ary);
+                        Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
+                        Value *ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
+                        ptindex = emit_bitcast(ctx, ptindex, T_pint8);
+                        ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
+                        ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
+                        tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
+                        if (jl_is_datatype(val.typ) && jl_datatype_size(val.typ) == 0) {
+                            // no-op
                         }
                         else {
-                            typed_store(ctx,
-                                        emit_arrayptr(ctx, ary, ary_ex, isboxed),
-                                        idx, val, ety,
-                                        !isboxed ? tbaa_arraybuf : tbaa_ptrarraybuf,
-                                        ctx.aliasscope, data_owner, 0);
+                            // copy data
+                            Value *addr = ctx.builder.CreateInBoundsGEP(AT, data, idx);
+                            emit_unionmove(ctx, addr, tbaa_arraybuf, val, nullptr);
                         }
                     }
-                    *ret = ary;
-                    return true;
+                    else {
+                        typed_store(ctx,
+                                    emit_arrayptr(ctx, ary, ary_ex, isboxed),
+                                    idx, val, jl_cgval_t(), ety,
+                                    isboxed ? tbaa_ptrarraybuf : tbaa_arraybuf,
+                                    ctx.aliasscope,
+                                    data_owner,
+                                    isboxed,
+                                    isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                                    isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                                    0,
+                                    false,
+                                    true,
+                                    false,
+                                    false,
+                                    false,
+                                    false,
+                                    nullptr,
+                                    "");
+                    }
                 }
+                *ret = ary;
+                return true;
             }
         }
     }
 
-    else if (f == jl_builtin_getfield && (nargs == 2 || nargs == 3)) {
+    else if (f == jl_builtin_getfield && (nargs == 2 || nargs == 3 || nargs == 4)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
-        if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
-            *ret = emit_getfield(ctx, argv[1], (jl_sym_t*)fld.constant);
+        enum jl_memory_order order = jl_memory_order_unspecified;
+        jl_value_t *boundscheck = jl_true;
+
+        if (nargs == 4) {
+            const jl_cgval_t &ord = argv[3];
+            const jl_cgval_t &inb = argv[4];
+            emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, "getfield");
+            emit_typecheck(ctx, inb, (jl_value_t*)jl_bool_type, "getfield");
+            if (!ord.constant)
+                return false;
+            order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+            if (inb.constant == jl_false)
+                boundscheck = jl_false;
+        }
+        else if (nargs == 3) {
+            const jl_cgval_t &arg3 = argv[3];
+            if (arg3.typ == (jl_value_t*)jl_symbol_type && arg3.constant)
+                order = jl_get_atomic_order((jl_sym_t*)arg3.constant, true, false);
+            else if (arg3.constant == jl_false)
+                boundscheck = jl_false;
+            else if (arg3.typ != (jl_value_t*)jl_bool_type)
+                return false;
+        }
+        if (order == jl_memory_order_invalid) {
+            emit_atomic_error(ctx, "invalid atomic ordering");
+            *ret = jl_cgval_t(); // unreachable
             return true;
         }
 
-        if (fld.typ == (jl_value_t*)jl_long_type) {
+        jl_datatype_t *utt = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
+        if (jl_is_type_type((jl_value_t*)utt) && jl_is_concrete_type(jl_tparam0(utt)))
+            utt = (jl_datatype_t*)jl_typeof(jl_tparam0(utt));
+
+        if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
+            jl_sym_t *name = (jl_sym_t*)fld.constant;
+            if (obj.constant && jl_is_module(obj.constant)) {
+                *ret = emit_globalref(ctx, (jl_module_t*)obj.constant, name);
+                return true;
+            }
+
+            if (jl_is_datatype(utt) && jl_struct_try_layout(utt)) {
+                ssize_t idx = jl_field_index(utt, name, 0);
+                if (idx != -1 && !jl_has_free_typevars(jl_field_type(utt, idx))) {
+                    *ret = emit_getfield_knownidx(ctx, obj, idx, utt, order);
+                    return true;
+                }
+            }
+        }
+        else if (fld.typ == (jl_value_t*)jl_long_type) {
             if (ctx.vaSlot > 0) {
                 // optimize VA tuple
                 if (LoadInst *load = dyn_cast_or_null<LoadInst>(obj.V)) {
@@ -2990,7 +3176,6 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                                 ctx.builder.CreateInBoundsGEP(T_prjlvalue, ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)),
                                 NULL, false, NULL, NULL);
                         Value *idx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type);
-                        jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
                         idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck);
                         idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ctx.nReqArgs));
                         Instruction *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.argArray, idx), Align(sizeof(void*)));
@@ -3002,41 +3187,44 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 }
             }
 
-            jl_datatype_t *utt = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
-            if (jl_is_datatype(utt) && utt->layout) {
-                if ((jl_is_structtype(utt) || jl_is_tuple_type(utt)) && !jl_subtype((jl_value_t*)jl_module_type, obj.typ)) {
+            if (jl_is_datatype(utt)) {
+                if (jl_struct_try_layout(utt)) {
                     size_t nfields = jl_datatype_nfields(utt);
                     // integer index
                     size_t idx;
                     if (fld.constant && (idx = jl_unbox_long(fld.constant) - 1) < nfields) {
-                        // known index
-                        *ret = emit_getfield_knownidx(ctx, obj, idx, utt);
-                        return true;
+                        if (!jl_has_free_typevars(jl_field_type(utt, idx))) {
+                            // known index
+                            *ret = emit_getfield_knownidx(ctx, obj, idx, utt, order);
+                            return true;
+                        }
                     }
                     else {
                         // unknown index
                         Value *vidx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type);
-                        jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
-                        if (emit_getfield_unknownidx(ctx, ret, obj, vidx, utt, boundscheck)) {
+                        if (emit_getfield_unknownidx(ctx, ret, obj, vidx, utt, boundscheck, order)) {
                             return true;
                         }
                     }
                 }
-            }
-            else {
-                if (jl_is_tuple_type(utt) && is_tupletype_homogeneous(utt->types, true)) {
+                if (jl_is_tuple_type(utt) && is_tupletype_homogeneous(utt->parameters, true)) {
                     // For tuples, we can emit code even if we don't know the exact
                     // type (e.g. because we don't know the length). This is possible
                     // as long as we know that all elements are of the same (leaf) type.
                     if (obj.ispointer()) {
+                        if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                            emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+                            *ret = jl_cgval_t(); // unreachable
+                            return true;
+                        }
                         // Determine which was the type that was homogenous
                         jl_value_t *jt = jl_tparam0(utt);
                         if (jl_is_vararg(jt))
                             jt = jl_unwrap_vararg(jt);
+                        assert(jl_is_datatype(jt));
                         Value *vidx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type);
                         // This is not necessary for correctness, but allows to omit
                         // the extra code for getting the length of the tuple
-                        jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
                         if (!bounds_check_enabled(ctx, boundscheck)) {
                             vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(T_size, 1));
                         } else {
@@ -3044,44 +3232,25 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                                 emit_datatype_nfields(ctx, emit_typeof_boxed(ctx, obj)),
                                 jl_true);
                         }
-                        bool isboxed = !jl_datatype_isinlinealloc(jt);
+                        bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
                         Value *ptr = maybe_decay_tracked(ctx, data_pointer(ctx, obj));
                         *ret = typed_load(ctx, ptr, vidx,
                                 isboxed ? (jl_value_t*)jl_any_type : jt,
-                                obj.tbaa, nullptr, false);
+                                obj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false);
                         return true;
                     }
                 }
             }
         }
+        // TODO: generic getfield func with more efficient calling convention
+        return false;
     }
 
-    else if (f == jl_builtin_setfield && nargs == 3) {
-        const jl_cgval_t &obj = argv[1];
-        const jl_cgval_t &fld = argv[2];
-        const jl_cgval_t &val = argv[3];
-
-        jl_datatype_t *uty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
-        if (jl_is_structtype(uty) && uty != jl_module_type && uty->layout) {
-            size_t idx = (size_t)-1;
-            if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
-                idx = jl_field_index(uty, (jl_sym_t*)fld.constant, 0);
-            }
-            else if (fld.constant && fld.typ == (jl_value_t*)jl_long_type) {
-                ssize_t i = jl_unbox_long(fld.constant);
-                if (i > 0 && i <= jl_datatype_nfields(uty))
-                    idx = i - 1;
-            }
-            if (idx != (size_t)-1) {
-                jl_value_t *ft = jl_svecref(uty->types, idx);
-                if (jl_subtype(val.typ, ft)) {
-                    // TODO: attempt better codegen for approximate types
-                    emit_setfield(ctx, uty, obj, idx, val, true, true);
-                    *ret = val;
-                    return true;
-                }
-            }
-        }
+    else if ((f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) ||
+             (f == jl_builtin_swapfield && (nargs == 3 || nargs == 4)) ||
+             (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6)) ||
+             (f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5))) {
+        return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr);
     }
 
     else if (f == jl_builtin_nfields && nargs == 1) {
@@ -3128,6 +3297,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 Value *types_len = emit_datatype_nfields(ctx, tyv);
                 Value *idx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type);
                 jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
+                if (nargs == 3)
+                    emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "fieldtype");
                 emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck);
                 Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, T_pprjlvalue)), idx);
                 Value *fieldtyp = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, fieldtyp_p, Align(sizeof(void*))));
@@ -3140,7 +3311,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     else if (f == jl_builtin_sizeof && nargs == 1) {
         const jl_cgval_t &obj = argv[1];
         jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
-        assert(jl_string_type->mutabl);
+        assert(jl_string_type->name->mutabl);
         if (sty == jl_string_type || sty == jl_simplevector_type) {
             if (obj.constant) {
                 size_t sz;
@@ -3198,7 +3369,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         }
     }
 
-    else if (f == jl_builtin_isdefined && nargs == 2) {
+    else if (f == jl_builtin_isdefined && (nargs == 2 || nargs == 3)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
         jl_datatype_t *stt = (jl_datatype_t*)obj.typ;
@@ -3228,10 +3399,41 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         else {
             return false;
         }
-        if (fieldidx < 0 || fieldidx >= jl_datatype_nfields(stt)) {
+        enum jl_memory_order order = jl_memory_order_unspecified;
+        if (nargs == 3) {
+            const jl_cgval_t &ord = argv[3];
+            emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, "isdefined");
+            if (!ord.constant)
+                return false;
+            order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+        }
+        if (order == jl_memory_order_invalid) {
+            emit_atomic_error(ctx, "invalid atomic ordering");
+            *ret = jl_cgval_t(); // unreachable
+            return true;
+        }
+        ssize_t nf = jl_datatype_nfields(stt);
+        if (fieldidx < 0 || fieldidx >= nf) {
+            if (order != jl_memory_order_unspecified) {
+                emit_atomic_error(ctx, "isdefined: atomic ordering cannot be specified for nonexistent field");
+                *ret = jl_cgval_t(); // unreachable
+                return true;
+            }
             *ret = mark_julia_const(jl_false);
+            return true;
+        }
+        bool isatomic = jl_field_isatomic(stt, fieldidx);
+        if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+            emit_atomic_error(ctx, "isdefined: non-atomic field cannot be accessed atomically");
+            *ret = jl_cgval_t(); // unreachable
+            return true;
+        }
+        if (isatomic && order == jl_memory_order_notatomic) {
+            emit_atomic_error(ctx, "isdefined: atomic field cannot be accessed non-atomically");
+            *ret = jl_cgval_t(); // unreachable
+            return true;
         }
-        else if (fieldidx < stt->ninitialized) {
+        else if (fieldidx < nf - stt->name->n_uninitialized) {
             *ret = mark_julia_const(jl_true);
         }
         else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) {
@@ -3248,7 +3450,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 // emit this using the same type as emit_getfield_knownidx
                 // so that LLVM may be able to load-load forward them and fold the result
                 fldv = tbaa_decorate(tbaa, ctx.builder.CreateAlignedLoad(T_prjlvalue, addr, Align(sizeof(size_t))));
-                cast<LoadInst>(fldv)->setOrdering(AtomicOrdering::Unordered);
+                cast<LoadInst>(fldv)->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             }
             else {
                 fldv = ctx.builder.CreateExtractValue(obj.V, offs);
@@ -3263,6 +3465,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         else {
             *ret = mark_julia_const(jl_true);
         }
+        if (order > jl_memory_order_monotonic && ret->constant) {
+            // fence instructions may only have acquire, release, acq_rel, or seq_cst ordering.
+            ctx.builder.CreateFence(get_llvm_atomic_order(order));
+        }
         return true;
     }
 
@@ -3271,7 +3477,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
 // Returns T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
+                             const jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
 {
     // emit arguments
     SmallVector<Value*, 3> theArgs;
@@ -3295,14 +3501,14 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
 }
 // Returns T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
+                             const jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
 {
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, cc);
 }
 
 
 static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject,
-                                          jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
 {
     // emit specialized call site
     bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
@@ -3357,8 +3563,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
             argvals[idx] = boxed(ctx, arg);
         }
         else if (et->isAggregateType()) {
-            if (!arg.ispointer())
-                arg = value_to_pointer(ctx, arg);
+            arg = value_to_pointer(ctx, arg);
             // can lazy load on demand, no copy needed
             assert(at == PointerType::get(et, AddressSpace::Derived));
             argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx,
@@ -3383,7 +3588,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     jl_cgval_t retval;
     switch (returninfo.cc) {
         case jl_returninfo_t::Boxed:
-            retval = mark_julia_type(ctx, call, true, inferred_retty);
+            retval = mark_julia_type(ctx, call, true, jlretty);
             break;
         case jl_returninfo_t::Register:
             retval = mark_julia_type(ctx, call, false, jlretty);
@@ -3413,20 +3618,18 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
             break;
     }
     // see if inference has a different / better type for the call than the lambda
-    if (inferred_retty != retval.typ)
-        retval = update_julia_type(ctx, retval, inferred_retty);
-    return retval;
+    return update_julia_type(ctx, retval, inferred_retty);
 }
 
-static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, StringRef specFunctionObject,
-                                          jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
+static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject,
+                                          const jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
 {
     auto theFptr = cast<Function>(
         jl_Module->getOrInsertFunction(specFunctionObject, jl_func_sig).getCallee());
     add_return_attr(theFptr, Attribute::NonNull);
     theFptr->addFnAttr(Thunk);
     Value *ret = emit_jlcall(ctx, theFptr, nullptr, argv, nargs, JLCALL_F_CC);
-    return mark_julia_type(ctx, ret, true, inferred_retty);
+    return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
@@ -3443,7 +3646,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
         if (argv[i].typ == jl_bottom_type)
             return jl_cgval_t();
     }
+    return emit_invoke(ctx, lival, argv, nargs, rt);
+}
 
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt)
+{
     bool handled = false;
     jl_cgval_t result;
     if (lival.constant) {
@@ -3455,7 +3662,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
             FunctionType *ft = ctx.f->getFunctionType();
             StringRef protoname = ctx.f->getName();
             if (ft == jl_func_sig) {
-                result = emit_call_specfun_boxed(ctx, protoname, argv, nargs, rt);
+                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, argv, nargs, rt);
                 handled = true;
             }
             else if (ft != jl_func_sig_sparams) {
@@ -3497,7 +3704,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
                     if (specsig)
                         result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, argv, nargs, &cc, &return_roots, rt);
                     else
-                        result = emit_call_specfun_boxed(ctx, protoname, argv, nargs, rt);
+                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, argv, nargs, rt);
                     handled = true;
                     if (need_to_emit) {
                         Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
@@ -3516,6 +3723,40 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
     return result;
 }
 
+static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
+{
+    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    size_t arglen = jl_array_dim0(ex->args);
+    size_t nargs = arglen - 1;
+    assert(arglen >= 2);
+    jl_cgval_t lival = emit_expr(ctx, args[0]);
+    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    for (size_t i = 0; i < nargs; ++i) {
+        argv[i] = emit_expr(ctx, args[i + 1]);
+        if (argv[i].typ == jl_bottom_type)
+            return jl_cgval_t();
+    }
+    const jl_cgval_t &f = argv[0];
+    jl_cgval_t ret;
+    if (f.constant && f.constant == jl_builtin_modifyfield) {
+        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv, nargs - 1, &lival))
+            return ret;
+        auto it = builtin_func_map.find(&jl_f_modifyfield);
+        assert(it != builtin_func_map.end());
+        Value *oldnew = emit_jlcall(ctx, it->second, V_rnull, &argv[1], nargs - 1, JLCALL_F_CC);
+        return mark_julia_type(ctx, oldnew, true, rt);
+    }
+    if (f.constant && jl_typeis(f.constant, jl_intrinsic_type)) {
+        JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
+        if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
+            return emit_atomic_pointerop(ctx, fi, argv, nargs - 1, &lival);
+    }
+
+    // emit function and arguments
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, JLCALL_F_CC);
+    return mark_julia_type(ctx, callval, true, rt);
+}
+
 static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
@@ -3789,30 +4030,14 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
             ssaslot->insertAfter(varslot);
             if (vi.isVolatile) {
                 Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot,
-#if JL_LLVM_VERSION >= 110000
                         varslot->getAlign(),
-#else
-                        varslot->getAlignment(),
-#endif
                         true);
-                ctx.builder.CreateAlignedStore(unbox, ssaslot,
-#if JL_LLVM_VERSION >= 110000
-                        ssaslot->getAlign()
-#else
-                        ssaslot->getAlignment()
-#endif
-                        );
+                ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign());
             }
             else {
                 const DataLayout &DL = jl_data_layout;
                 uint64_t sz = DL.getTypeStoreSize(T);
-                emit_memcpy(ctx, ssaslot, tbaa_stack, vi.value, sz,
-#if JL_LLVM_VERSION >= 110000
-                        ssaslot->getAlign().value()
-#else
-                        ssaslot->getAlignment()
-#endif
-                        );
+                emit_memcpy(ctx, ssaslot, tbaa_stack, vi.value, sz, ssaslot->getAlign().value());
             }
             Value *tindex = NULL;
             if (vi.pTIndex)
@@ -3935,9 +4160,13 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
 static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
 {
     jl_value_t *ssavalue_types = (jl_value_t*)ctx.source->ssavaluetypes;
-    assert(jl_is_array(ssavalue_types));
+    jl_value_t *phiType = NULL;
+    if (jl_is_array(ssavalue_types)) {
+        phiType = jl_array_ptr_ref(ssavalue_types, idx);
+    } else {
+        phiType = (jl_value_t*)jl_any_type;
+    }
     jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
-    jl_value_t *phiType = jl_array_ptr_ref(ssavalue_types, idx);
     BasicBlock *BB = ctx.builder.GetInsertBlock();
     auto InsertPt = BB->getFirstInsertionPt();
     if (phiType == jl_bottom_type) {
@@ -3963,11 +4192,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
             Value *isboxed = ctx.builder.CreateICmpNE(
                     ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(T_int8, 0x80)),
                     ConstantInt::get(T_int8, 0));
-#if JL_LLVM_VERSION >= 100000
             ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, MaybeAlign(0), nbytes, false);
-#else
-            ctx.builder.CreateMemCpy(phi, min_align, dest, 0, nbytes, false);
-#endif
             ctx.builder.CreateLifetimeEnd(dest);
             Value *ptr = ctx.builder.CreateSelect(isboxed,
                 maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), T_pint8),
@@ -4007,15 +4232,9 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         // here it's moved into phi in the successor (from dest)
         dest = emit_static_alloca(ctx, vtype);
         Value *phi = emit_static_alloca(ctx, vtype);
-#if JL_LLVM_VERSION >= 100000
         ctx.builder.CreateMemCpy(phi, MaybeAlign(julia_alignment(phiType)),
              dest, MaybeAlign(0),
              jl_datatype_size(phiType), false);
-#else
-        ctx.builder.CreateMemCpy(phi, julia_alignment(phiType),
-             dest, 0,
-             jl_datatype_size(phiType), false);
-#endif
         ctx.builder.CreateLifetimeEnd(dest);
         slot = mark_julia_slot(phi, phiType, NULL, tbaa_stack);
     }
@@ -4260,7 +4479,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
     jl_sym_t *head = ex->head;
     if (head == meta_sym || head == inbounds_sym || head == coverageeffect_sym
-            || head == aliasscope_sym || head == popaliasscope_sym) {
+            || head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) {
         // some expression types are metadata and can be ignored
         // in statement position
         return;
@@ -4384,6 +4603,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
             jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaval);
         return emit_invoke(ctx, ex, expr_t);
     }
+    else if (head == invoke_modify_sym) {
+        assert(ssaval >= 0);
+        jl_value_t *expr_t = jl_is_long(ctx.source->ssavaluetypes) ? (jl_value_t*)jl_any_type :
+            jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaval);
+        return emit_invoke_modify(ctx, ex, expr_t);
+    }
     else if (head == call_sym) {
         jl_value_t *expr_t;
         if (ssaval < 0)
@@ -4415,58 +4640,62 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         return emit_sparam(ctx, jl_unbox_long(args[0]) - 1);
     }
     else if (head == method_sym) {
-        jl_value_t *mn = args[0];
-        assert(jl_expr_nargs(ex) != 1 || jl_is_symbol(mn) || jl_is_slot(mn));
+        if (jl_expr_nargs(ex) == 1) {
+            jl_value_t *mn = args[0];
+            assert(jl_expr_nargs(ex) != 1 || jl_is_symbol(mn) || jl_is_slot(mn));
 
-        Value *bp = NULL, *name, *bp_owner = V_null;
-        jl_binding_t *bnd = NULL;
-        bool issym = jl_is_symbol(mn);
-        bool isglobalref = !issym && jl_is_globalref(mn);
-        jl_module_t *mod = ctx.module;
-        if (issym || isglobalref) {
-            if (isglobalref) {
-                mod = jl_globalref_mod(mn);
-                mn = (jl_value_t*)jl_globalref_name(mn);
-            }
-            JL_TRY {
-                if (jl_symbol_name((jl_sym_t*)mn)[0] == '@')
-                    jl_errorf("macro definition not allowed inside a local scope");
-                name = literal_pointer_val(ctx, mn);
-                bnd = jl_get_binding_for_method_def(mod, (jl_sym_t*)mn);
-            }
-            JL_CATCH {
-                jl_value_t *e = jl_current_exception();
-                // errors. boo. root it somehow :(
-                bnd = jl_get_binding_wr(ctx.module, (jl_sym_t*)jl_gensym(), 1);
-                bnd->value = e;
-                bnd->constp = 1;
-                raise_exception(ctx, literal_pointer_val(ctx, e));
-                return ghostValue(jl_nothing_type);
-            }
-            bp = julia_binding_gv(ctx, bnd);
-            bp_owner = literal_pointer_val(ctx, (jl_value_t*)mod);
-        }
-        else if (jl_is_slot(mn) || jl_is_argument(mn)) {
-            int sl = jl_slot_number(mn)-1;
-            jl_varinfo_t &vi = ctx.slots[sl];
-            bp = vi.boxroot;
-            name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
-        }
-        if (bp) {
-            Value *mdargs[5] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp,
-                                 bp_owner, literal_pointer_val(ctx, bnd) };
-            jl_cgval_t gf = mark_julia_type(
-                    ctx,
-                    ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), makeArrayRef(mdargs)),
-                    true,
-                    jl_function_type);
-            if (jl_expr_nargs(ex) == 1)
+            Value *bp = NULL, *name, *bp_owner = V_null;
+            jl_binding_t *bnd = NULL;
+            bool issym = jl_is_symbol(mn);
+            bool isglobalref = !issym && jl_is_globalref(mn);
+            jl_module_t *mod = ctx.module;
+            if (issym || isglobalref) {
+                if (isglobalref) {
+                    mod = jl_globalref_mod(mn);
+                    mn = (jl_value_t*)jl_globalref_name(mn);
+                }
+                JL_TRY {
+                    if (jl_symbol_name((jl_sym_t*)mn)[0] == '@')
+                        jl_errorf("macro definition not allowed inside a local scope");
+                    name = literal_pointer_val(ctx, mn);
+                    bnd = jl_get_binding_for_method_def(mod, (jl_sym_t*)mn);
+                }
+                JL_CATCH {
+                    jl_value_t *e = jl_current_exception();
+                    // errors. boo. root it somehow :(
+                    bnd = jl_get_binding_wr(ctx.module, (jl_sym_t*)jl_gensym(), 1);
+                    bnd->value = e;
+                    bnd->constp = 1;
+                    raise_exception(ctx, literal_pointer_val(ctx, e));
+                    return ghostValue(jl_nothing_type);
+                }
+                bp = julia_binding_gv(ctx, bnd);
+                bp_owner = literal_pointer_val(ctx, (jl_value_t*)mod);
+            }
+            else if (jl_is_slot(mn) || jl_is_argument(mn)) {
+                int sl = jl_slot_number(mn)-1;
+                jl_varinfo_t &vi = ctx.slots[sl];
+                bp = vi.boxroot;
+                name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
+            }
+            if (bp) {
+                Value *mdargs[5] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp,
+                                    bp_owner, literal_pointer_val(ctx, bnd) };
+                jl_cgval_t gf = mark_julia_type(
+                        ctx,
+                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), makeArrayRef(mdargs)),
+                        true,
+                        jl_function_type);
                 return gf;
+            }
+            emit_error(ctx, "method: invalid declaration");
+            return jl_cgval_t();
         }
         Value *a1 = boxed(ctx, emit_expr(ctx, args[1]));
         Value *a2 = boxed(ctx, emit_expr(ctx, args[2]));
-        Value *mdargs[3] = {
+        Value *mdargs[4] = {
             /*argdata*/a1,
+            ConstantPointerNull::get(cast<PointerType>(T_prjlvalue)),
             /*code*/a2,
             /*module*/literal_pointer_val(ctx, (jl_value_t*)ctx.module)
         };
@@ -4557,10 +4786,10 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
             std::unique_ptr<Module> closure_m;
             jl_llvm_functions_t closure_decls;
 
-            jl_method_instance_t *li;
-            jl_value_t *closure_t;
-            jl_tupletype_t *env_t;
-            jl_svec_t *sig_args;
+            jl_method_instance_t *li = NULL;
+            jl_value_t *closure_t = NULL;
+            jl_tupletype_t *env_t = NULL;
+            jl_svec_t *sig_args = NULL;
             JL_GC_PUSH5(&li, &closure_src, &closure_t, &env_t, &sig_args);
 
             li = jl_new_method_instance_uninit();
@@ -4691,7 +4920,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
     }
     else if (head == leave_sym || head == coverageeffect_sym
             || head == pop_exception_sym || head == enter_sym || head == inbounds_sym
-            || head == aliasscope_sym || head == popaliasscope_sym) {
+            || head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) {
         jl_errorf("Expr(:%s) in value position", jl_symbol_name(head));
     }
     else if (head == boundscheck_sym) {
@@ -4760,20 +4989,58 @@ JL_GCC_IGNORE_STOP
 static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0)
 {
     // TODO: requires the runtime, but is generated unconditionally
-
     // allocate a placeholder gc instruction
-    ctx.ptlsStates = ctx.builder.CreateCall(prepare_call(jltls_states_func));
-    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void*);
-    ctx.signalPage = emit_nthptr_recast(ctx, ctx.ptlsStates, nthfield, tbaa_const,
-                                        PointerType::get(T_psize, 0));
+    ctx.pgcstack = ctx.builder.CreateCall(prepare_call(jlpgcstack_func));
 }
 
+static Value *get_current_task(jl_codectx_t &ctx)
+{
+    const int ptls_offset = offsetof(jl_task_t, gcstack);
+    return ctx.builder.CreateInBoundsGEP(
+        T_pjlvalue, emit_bitcast(ctx, ctx.pgcstack, T_ppjlvalue),
+        ConstantInt::get(T_size, -ptls_offset / sizeof(void *)),
+        "current_task");
+}
+
+// Get PTLS through current task.
+static Value *get_current_ptls(jl_codectx_t &ctx)
+{
+    const int ptls_offset = offsetof(jl_task_t, ptls);
+    Value *pptls = ctx.builder.CreateInBoundsGEP(
+        T_pjlvalue, get_current_task(ctx),
+        ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
+        "ptls_field");
+    LoadInst *ptls_load = ctx.builder.CreateAlignedLoad(
+        emit_bitcast(ctx, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
+    // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
+    tbaa_decorate(tbaa_gcframe, ptls_load);
+    // Using `CastInst::Create` to get an `Instruction*` without explicit cast:
+    auto ptls = CastInst::Create(Instruction::BitCast, ptls_load, T_ppjlvalue, "ptls");
+    ctx.builder.Insert(ptls);
+    return ptls;
+}
+
+// Store world age at the entry block of the function. This function should be
+// called right after `allocate_gc_frame` and there should be no context switch.
 static void emit_last_age_field(jl_codectx_t &ctx)
 {
+    auto ptls = get_current_task(ctx);
+    assert(ctx.builder.GetInsertBlock() == ctx.pgcstack->getParent());
     ctx.world_age_field = ctx.builder.CreateInBoundsGEP(
             T_size,
-            ctx.builder.CreateBitCast(ctx.ptlsStates, T_psize),
-            ConstantInt::get(T_size, offsetof(jl_tls_states_t, world_age) / sizeof(size_t)));
+            ctx.builder.CreateBitCast(ptls, T_psize),
+            ConstantInt::get(T_size, offsetof(jl_task_t, world_age) / sizeof(size_t)),
+            "world_age");
+}
+
+// Get signal page through current task.
+static Value *get_current_signal_page(jl_codectx_t &ctx)
+{
+    // return ctx.builder.CreateCall(prepare_call(reuse_signal_page_func));
+    auto ptls = get_current_ptls(ctx);
+    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
+    return emit_nthptr_recast(ctx, ptls, nthfield, tbaa_const,
+                              PointerType::get(T_psize, 0));
 }
 
 static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_codegen_params_t &params)
@@ -4782,7 +5049,7 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     std::string name;
     raw_string_ostream(name) << "tojlinvoke" << globalUnique++;
     Function *f = Function::Create(jl_func_sig,
-            GlobalVariable::PrivateLinkage,
+            GlobalVariable::InternalLinkage,
             name, M);
     jl_init_function(f);
     f->addFnAttr(Thunk);
@@ -4974,9 +5241,48 @@ static Function* gen_cfun_wrapper(
         // add nest parameter (pointer to jl_value_t* data array) after sret arg
         assert(closure_types);
         std::vector<Type*> fargt_sig(sig.fargt_sig);
+
         fargt_sig.insert(fargt_sig.begin() + sig.sret, T_pprjlvalue);
+
+        // Shift LLVM attributes for parameters one to the right, as
+        // we are adding the extra nest parameter after sret arg.
+        std::vector<std::pair<unsigned, AttributeSet>> newAttributes;
+        newAttributes.reserve(attributes.getNumAttrSets() + 1);
+        auto it = attributes.index_begin();
+
+        // Skip past FunctionIndex
+        if (it == AttributeList::AttrIndex::FunctionIndex) {
+            ++it;
+        }
+
+        // Move past ReturnValue and parameter return value
+        for (;it < AttributeList::AttrIndex::FirstArgIndex + sig.sret; ++it) {
+            if (attributes.hasAttributes(it)) {
+                newAttributes.emplace_back(it, attributes.getAttributes(it));
+            }
+        }
+
+        // Add the new nest attribute
+        AttrBuilder attrBuilder;
+        attrBuilder.addAttribute(Attribute::Nest);
+        newAttributes.emplace_back(it, AttributeSet::get(jl_LLVMContext, attrBuilder));
+
+        // Shift forward the rest of the attributes
+        for(;it < attributes.index_end(); ++it) {
+            if (attributes.hasAttributes(it)) {
+                newAttributes.emplace_back(it + 1, attributes.getAttributes(it));
+            }
+        }
+
+        // Remember to add back FunctionIndex
+        if (attributes.hasAttributes(AttributeList::AttrIndex::FunctionIndex)) {
+            newAttributes.emplace_back(AttributeList::AttrIndex::FunctionIndex,
+                                       attributes.getAttributes(AttributeList::AttrIndex::FunctionIndex));
+        }
+
+        // Create the new AttributeList
+        attributes = AttributeList::get(jl_LLVMContext, newAttributes);
         functype = FunctionType::get(sig.sret ? T_void : sig.prt, fargt_sig, /*isVa*/false);
-        attributes = attributes.addAttribute(jl_LLVMContext, 1 + sig.sret, Attribute::Nest);
     }
     else {
         functype = sig.functype();
@@ -5001,14 +5307,11 @@ static Function* gen_cfun_wrapper(
     emit_last_age_field(ctx);
 
     Value *dummy_world = ctx.builder.CreateAlloca(T_size);
-    Value *have_tls = ctx.builder.CreateIsNotNull(ctx.ptlsStates);
+    Value *have_tls = ctx.builder.CreateIsNotNull(ctx.pgcstack);
     // TODO: in the future, try to initialize a full TLS context here
     // for now, just use a dummy field to avoid a branch in this function
     ctx.world_age_field = ctx.builder.CreateSelect(have_tls, ctx.world_age_field, dummy_world);
     Value *last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateAlignedLoad(ctx.world_age_field, Align(sizeof(size_t))));
-    Value *valid_tls = ctx.builder.CreateIsNotNull(last_age);
-    have_tls = ctx.builder.CreateAnd(have_tls, valid_tls);
-    ctx.world_age_field = ctx.builder.CreateSelect(valid_tls, ctx.world_age_field, dummy_world);
     Value *world_v = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
     // TODO: cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Monotonic);
 
@@ -5283,8 +5586,7 @@ static Function* gen_cfun_wrapper(
             }
             else if (T->isAggregateType()) {
                 // aggregate types are passed by pointer
-                if (!inputarg.ispointer())
-                    inputarg = value_to_pointer(ctx, inputarg);
+                inputarg = value_to_pointer(ctx, inputarg);
                 arg = maybe_bitcast(ctx, decay_derived(ctx, data_pointer(ctx, inputarg)),
                     T->getPointerTo());
             }
@@ -5441,10 +5743,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
     // some sanity checking and check whether there's a vararg
     size_t nargt = jl_svec_len(argt);
     bool isVa = (nargt > 0 && jl_is_vararg(jl_svecref(argt, nargt - 1)));
-    if (isVa) {
-        emit_error(ctx, "cfunction: Vararg syntax not allowed for argument list");
-        return jl_cgval_t();
-    }
+    assert(!isVa);
 
     jl_array_t *closure_types = NULL;
     jl_value_t *sigt = NULL; // dispatch-sig = type signature with Ref{} annotations removed and applied to the env
@@ -5593,7 +5892,7 @@ const char *jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t
         crt = (jl_value_t*)jl_any_type;
     }
     bool toboxed;
-    Type *lcrt = _julia_struct_to_llvm(&params, crt, NULL, &toboxed);
+    Type *lcrt = _julia_struct_to_llvm(&params, crt, &toboxed);
     if (toboxed)
         lcrt = T_prjlvalue;
     size_t nargs = jl_nparams(sigt)-1;
@@ -5764,8 +6063,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
 {
     jl_returninfo_t props = {};
     SmallVector<Type*, 8> fsig;
-    Type *rt;
-    Type *srt;
+    Type *rt = NULL;
+    Type *srt = NULL;
     if (jl_is_structtype(jlrettype) && jl_is_datatype_singleton((jl_datatype_t*)jlrettype)) {
         rt = T_void;
         props.cc = jl_returninfo_t::Register;
@@ -6155,7 +6454,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     f->addFnAttr(Attribute::StackProtectStrong);
 #endif
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     // TODO: enable this only when a argument like `-race` is passed to Julia
     //       add a macro for no_sanitize_thread
     f->addFnAttr(llvm::Attribute::SanitizeThread);
@@ -6297,20 +6596,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         }
     }
 
-    /*
-    // step 6. (optional) check for stack overflow (the slower way)
-    Value *cur_sp =
-        ctx.builder.CreateCall(Intrinsic::getDeclaration(M,
-                                                     Intrinsic::frameaddress),
-                           ConstantInt::get(T_int32, 0));
-    Value *sp_ok =
-        ctx.builder.CreateICmpUGT(cur_sp,
-                              ConstantInt::get(T_size,
-                                               (uptrint_t)jl_stack_lo));
-    error_unless(ctx, sp_ok, "stack overflow");
-    */
-
-    // step 7. set up GC frame
+    // step 6. set up GC frame
     allocate_gc_frame(ctx, b0);
     Value *last_age = NULL;
     emit_last_age_field(ctx);
@@ -6318,7 +6604,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateAlignedLoad(ctx.world_age_field, Align(sizeof(size_t))));
     }
 
-    // step 8. allocate local variables slots
+    // step 7. allocate local variables slots
     // must be in the first basic block for the llvm mem2reg pass to work
     auto allocate_local = [&](jl_varinfo_t &varinfo, jl_sym_t *s) {
         jl_value_t *jt = varinfo.value.typ;
@@ -6355,7 +6641,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             if (allunbox)
                 return;
         }
-        else if (deserves_stack(jt, true)) {
+        else if (deserves_stack(jt)) {
             bool isboxed;
             Type *vtype = julia_type_to_llvm(ctx, jt, &isboxed);
             assert(!isboxed);
@@ -6377,9 +6663,9 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             (va && (int)i == ctx.vaSlot) || // or it's the va arg tuple
             i == 0) { // or it is the first argument (which isn't in `argArray`)
             AllocaInst *av = new AllocaInst(T_prjlvalue, 0,
-                jl_symbol_name(s), /*InsertBefore*/ctx.ptlsStates);
+                jl_symbol_name(s), /*InsertBefore*/ctx.pgcstack);
             StoreInst *SI = new StoreInst(V_rnull, av, false, Align(sizeof(void*)));
-            SI->insertAfter(ctx.ptlsStates);
+            SI->insertAfter(ctx.pgcstack);
             varinfo.boxroot = av;
             if (ctx.debug_enabled && varinfo.dinfo) {
                 DIExpression *expr;
@@ -6436,7 +6722,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         }
     }
 
-    // step 9. move args into local variables
+    // step 8. move args into local variables
     Function::arg_iterator AI = f->arg_begin();
 
     auto get_specsig_arg = [&](jl_value_t *argType, Type *llvmArgType, bool isboxed) {
@@ -6525,7 +6811,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                         ConstantInt::get(T_size, offsetof(jl_opaque_closure_t, world)));
 
                 jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
-                    theArg.tbaa, nullptr, false, sizeof(size_t));
+                    theArg.tbaa, nullptr, false, AtomicOrdering::NotAtomic, false, sizeof(size_t));
                 emit_unbox(ctx, T_size, closure_world, (jl_value_t*)jl_long_type, ctx.world_age_field, tbaa_gcframe);
 
                 // Load closure env
@@ -6534,7 +6820,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                         ConstantInt::get(T_size, offsetof(jl_opaque_closure_t, captures)));
 
                 jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
-                    theArg.tbaa, nullptr, false, sizeof(void*));
+                    theArg.tbaa, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
                 theArg = convert_julia_type(ctx, closure_env, vi.value.typ);
             }
 
@@ -6566,7 +6852,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         }
     }
 
-    // step 10. allocate rest argument
+    // step 9. allocate rest argument
     CallInst *restTuple = NULL;
     if (va && ctx.vaSlot != -1) {
         jl_varinfo_t &vi = ctx.slots[ctx.vaSlot];
@@ -6608,7 +6894,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         }
     }
 
-    // step 11. Compute properties for each statements
+    // step 10. Compute properties for each statements
     //     This needs to be computed by iterating in the IR order
     //     instead of control flow order.
     auto in_user_mod = [] (jl_module_t *mod) {
@@ -6730,7 +7016,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     Instruction &prologue_end = ctx.builder.GetInsertBlock()->back();
 
 
-    // step 12. Do codegen in control flow order
+    // step 11. Do codegen in control flow order
     std::vector<int> workstack;
     std::map<int, BasicBlock*> BB;
     std::map<size_t, BasicBlock*> come_from_bb;
@@ -6743,8 +7029,8 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         if (seq_next >= 0 && (unsigned)seq_next < stmtslen) {
             workstack.push_back(seq_next);
         }
-        else if (!ctx.builder.GetInsertBlock()->getTerminator()) {
-            ctx.builder.CreateUnreachable();
+        else if (ctx.builder.GetInsertBlock() && !ctx.builder.GetInsertBlock()->getTerminator()) {
+            CreateTrap(ctx.builder, false);
         }
         while (!workstack.empty()) {
             int item = workstack.back();
@@ -6754,7 +7040,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 cursor = item;
                 return;
             }
-            if (seq_next != -1 && !ctx.builder.GetInsertBlock()->getTerminator()) {
+            if (seq_next != -1 && ctx.builder.GetInsertBlock() && !ctx.builder.GetInsertBlock()->getTerminator()) {
                 come_from_bb[cursor + 1] = ctx.builder.GetInsertBlock();
                 ctx.builder.CreateBr(nextbb->second);
             }
@@ -6892,7 +7178,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         if (jl_is_returnnode(stmt)) {
             jl_value_t *retexpr = jl_returnnode_value(stmt);
             if (retexpr == NULL) {
-                ctx.builder.CreateUnreachable();
+                CreateTrap(ctx.builder, false);
                 find_next_stmt(-1);
                 continue;
             }
@@ -6901,7 +7187,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             jl_cgval_t retvalinfo = emit_expr(ctx, retexpr);
             retvalinfo = convert_julia_type(ctx, retvalinfo, jlrettype);
             if (retvalinfo.typ == jl_bottom_type) {
-                ctx.builder.CreateUnreachable();
+                CreateTrap(ctx.builder, false);
                 find_next_stmt(-1);
                 continue;
             }
@@ -7065,17 +7351,6 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     ctx.builder.SetCurrentDebugLocation(noDbg);
     ctx.builder.ClearInsertionPoint();
 
-    auto undef_value_for_type = [&](Type *T) {
-        auto tracked = CountTrackedPointers(T);
-        Constant *undef;
-        if (tracked.count)
-            // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
-            undef = Constant::getNullValue(T);
-        else
-            undef = UndefValue::get(T);
-        return undef;
-    };
-
     // Codegen Phi nodes
     std::map<std::pair<BasicBlock*, BasicBlock*>, BasicBlock*> BB_rewrite_map;
     std::vector<llvm::PHINode*> ToDelete;
@@ -7288,7 +7563,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         PN->eraseFromParent();
     }
 
-    // step 13. Perform any delayed instantiations
+    // step 12. Perform any delayed instantiations
     if (ctx.debug_enabled) {
         bool in_prologue = true;
         for (auto &BB : *ctx.f) {
@@ -7375,8 +7650,8 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         JL_UNLOCK(&m->writelock);
     }
 
-    // link the dependent llvmcall modules, but switch their function's linkage to private
-    // so that they don't show up in the execution engine.
+    // link the dependent llvmcall modules, but switch their function's linkage to internal
+    // so that they don't conflict when they show up in the execution engine.
     for (auto &Mod : ctx.llvmcall_modules) {
         SmallVector<std::string, 1> Exports;
         for (const auto &F: Mod->functions())
@@ -7386,7 +7661,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             jl_error("Failed to link LLVM bitcode");
         }
         for (auto FN: Exports)
-            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::PrivateLinkage);
+            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
     }
 
     // link in opaque closure modules
@@ -7397,7 +7672,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 Exports.push_back(F.getName().str());
         jl_merge_module(jl_Module, std::move(Mod));
         for (auto FN: Exports)
-            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::PrivateLinkage);
+            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
     }
 
     JL_GC_POP();
@@ -7595,9 +7870,9 @@ void jl_compile_workqueue(
             if (!preal_specsig) {
                 // emit specsig-to-(jl)invoke conversion
                 Function *preal = emit_tojlinvoke(codeinst, mod, params);
-                protodecl->setLinkage(GlobalVariable::PrivateLinkage);
+                protodecl->setLinkage(GlobalVariable::InternalLinkage);
                 //protodecl->setAlwaysInline();
-                protodecl->addFnAttr("no-frame-pointer-elim", "true");
+                jl_init_function(protodecl);
                 size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed
                 // TODO: maybe this can be cached in codeinst->specfptr?
                 emit_cfunc_invalidate(protodecl, proto_cc, proto_return_roots, codeinst->def->specTypes, codeinst->rettype, nrealargs, params, preal);
@@ -7807,8 +8082,9 @@ static void init_jit_functions(void)
     global_jlvalue_to_llvm(new JuliaVariable{"jl_undefref_exception", true, get_pjlvalue}, &jl_undefref_exception);
     add_named_global(jlgetworld_global, &jl_world_counter);
     add_named_global("__stack_chk_fail", &__stack_chk_fail);
-    add_named_global(jltls_states_func, (void*)NULL);
+    add_named_global(jlpgcstack_func, (void*)NULL);
     add_named_global(jlerror_func, &jl_error);
+    add_named_global(jlatomicerror_func, &jl_atomic_error);
     add_named_global(jlthrow_func, &jl_throw);
     add_named_global(jlundefvarerror_func, &jl_undefined_var_error);
     add_named_global(jlboundserrorv_func, &jl_bounds_error_ints);
@@ -7838,7 +8114,7 @@ static void init_jit_functions(void)
     add_named_global(jlleave_func, &jl_pop_handler);
     add_named_global(jl_restore_excstack_func, &jl_restore_excstack);
     add_named_global(jl_excstack_state_func, &jl_excstack_state);
-    add_named_global(jlegal_func, &jl_egal);
+    add_named_global(jlegalx_func, &jl_egal__unboxed);
     add_named_global(jlisa_func, &jl_isa);
     add_named_global(jlsubtype_func, &jl_subtype);
     add_named_global(jltypeassert_func, &jl_typeassert);
@@ -7960,6 +8236,10 @@ extern "C" void jl_init_llvm(void)
     // and to ensure that it is 16-byte aligned for out-going calls,
     // to ensure compatibility with GCC codes
     options.StackAlignmentOverride = 16;
+#endif
+#ifdef JL_DEBUG_BUILD
+    // LLVM defaults to tls stack guard, which causes issues with Julia's tls implementation
+    options.StackProtectorGuard = StackProtectorGuards::Global;
 #endif
     Triple TheTriple(sys::getProcessTriple());
 #if defined(FORCE_ELF)
@@ -8122,12 +8402,12 @@ extern "C" void jl_dump_llvm_mfunction(void *v)
 
 extern void jl_write_bitcode_func(void *F, char *fname) {
     std::error_code EC;
-    raw_fd_ostream OS(fname, EC, sys::fs::F_None);
+    raw_fd_ostream OS(fname, EC, sys::fs::OF_None);
     llvm::WriteBitcodeToFile(*((llvm::Function*)F)->getParent(), OS);
 }
 
 extern void jl_write_bitcode_module(void *M, char *fname) {
     std::error_code EC;
-    raw_fd_ostream OS(fname, EC, sys::fs::F_None);
+    raw_fd_ostream OS(fname, EC, sys::fs::OF_None);
     llvm::WriteBitcodeToFile(*(llvm::Module*)M, OS);
 }
diff --git a/src/codegen_shared.h b/src/codegen_shared.h
index ee118708746887..f56854d2b4ca50 100644
--- a/src/codegen_shared.h
+++ b/src/codegen_shared.h
@@ -29,13 +29,8 @@ struct CountTrackedPointers {
     CountTrackedPointers(llvm::Type *T);
 };
 
-#if JL_LLVM_VERSION >= 110000
 unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> &irbuilder);
 std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
-#else
-unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> irbuilder);
-std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
-#endif
 
 static inline void llvm_dump(llvm::Value *v)
 {
diff --git a/src/common_symbols1.inc b/src/common_symbols1.inc
index d035ab76aa6ad8..80038837be0c46 100644
--- a/src/common_symbols1.inc
+++ b/src/common_symbols1.inc
@@ -1,100 +1,100 @@
-jl_symbol("getproperty"),
 jl_symbol("="),
-jl_symbol("Type"),
+jl_symbol("getproperty"),
+jl_symbol("apply_type"),
 jl_symbol("getfield"),
 jl_symbol("getindex"),
-jl_symbol("apply_type"),
 jl_symbol("convert"),
 jl_symbol("==="),
+jl_symbol("iterate"),
 jl_symbol("=="),
 jl_symbol("new"),
 jl_symbol("foreigncall"),
-jl_symbol("ccall"),
 jl_symbol("int.jl"),
-jl_symbol("+"),
-jl_symbol("boot.jl"),
-jl_symbol("not_int"),
+jl_symbol("throw"),
+jl_symbol("nothing"),
 jl_symbol("essentials.jl"),
-jl_symbol("sysimg.jl"),
-jl_symbol("<"),
+jl_symbol("+"),
 jl_symbol("unsafe_convert"),
+jl_symbol("not_int"),
 jl_symbol("-"),
-jl_symbol("iterate"),
+jl_symbol("boot.jl"),
 jl_symbol("number.jl"),
-jl_symbol("throw"),
-jl_symbol("promotion.jl"),
-jl_symbol("static_parameter"),
 jl_symbol("length"),
+jl_symbol("<"),
 jl_symbol("cconvert"),
+jl_symbol("Base.jl"),
+jl_symbol("promotion.jl"),
 jl_symbol("tuple.jl"),
+jl_symbol("static_parameter"),
+jl_symbol("isempty"),
+jl_symbol("<="),
 jl_symbol("array.jl"),
 jl_symbol("operators.jl"),
-jl_symbol("*"),
+jl_symbol("NamedTuple"),
 jl_symbol("bitcast"),
-jl_symbol("slt_int"),
-jl_symbol("isempty"),
-jl_symbol("indexed_iterate"),
-jl_symbol("size"),
 jl_symbol("!"),
-jl_symbol("nothing"),
-jl_symbol("NamedTuple"),
-jl_symbol("<="),
+jl_symbol("indexed_iterate"),
+jl_symbol("sle_int"),
 jl_symbol("bool.jl"),
-jl_symbol("string"),
-jl_symbol("!="),
-jl_symbol("deprecated.jl"),
-jl_symbol("_apply"),
-jl_symbol("none"),
-jl_symbol("meta"),
-jl_symbol("typeof"),
-jl_symbol("ifelse"),
-jl_symbol("name"),
+jl_symbol("Ptr"),
+jl_symbol("size"),
 jl_symbol("add_int"),
-jl_symbol("setindex!"),
+jl_symbol("slt_int"),
+jl_symbol("*"),
 jl_symbol("range.jl"),
+jl_symbol("abstractarray.jl"),
+jl_symbol("!="),
+jl_symbol("isa"),
+jl_symbol("setindex!"),
+jl_symbol("string"),
+jl_symbol("ifelse"),
 jl_symbol(":"),
-jl_symbol("depwarn"),
-jl_symbol("noinline"),
 jl_symbol(">"),
-jl_symbol("UInt8"),
-jl_symbol("abstractarray.jl"),
-jl_symbol("sub_int"),
-jl_symbol("max"),
-jl_symbol("sle_int"),
-jl_symbol("Typeof"),
-jl_symbol("mt"),
+jl_symbol("_apply_iterate"),
+jl_symbol("UInt64"),
 jl_symbol("&"),
-jl_symbol("Ptr"),
-jl_symbol("pointer.jl"),
+jl_symbol("max"),
 jl_symbol("rem"),
+jl_symbol("sub_int"),
 jl_symbol(">="),
-jl_symbol("typeassert"),
-jl_symbol("lshr_int"),
-jl_symbol("toInt64"),
-jl_symbol("trunc_int"),
+jl_symbol("UInt8"),
+jl_symbol("iterators.jl"),
+jl_symbol("Int64"),
 jl_symbol("pairs"),
 jl_symbol("and_int"),
 jl_symbol("last"),
-jl_symbol("iterators.jl"),
-jl_symbol("first"),
-jl_symbol("eq_int"),
-jl_symbol("throw_inexacterror"),
-jl_symbol("map"),
-jl_symbol("UInt64"),
+jl_symbol("typeof"),
 jl_symbol("arrayref"),
-jl_symbol("Int"),
-jl_symbol("reinterpret"),
-jl_symbol("Int64"),
-jl_symbol("setfield!"),
-jl_symbol("kwfunc"),
+jl_symbol("pointer.jl"),
+jl_symbol("toInt64"),
 jl_symbol("arraylen"),
-jl_symbol("axes"),
+jl_symbol("typeassert"),
+jl_symbol("map"),
+jl_symbol("kwfunc"),
 jl_symbol("ArgumentError"),
-jl_symbol("macro expansion"),
+jl_symbol("lshr_int"),
+jl_symbol("axes"),
+jl_symbol("reinterpret"),
+jl_symbol("Array"),
+jl_symbol("first"),
+jl_symbol("trunc_int"),
+jl_symbol("OneTo"),
+jl_symbol("haskey"),
+jl_symbol("Int"),
+jl_symbol("oneto"),
+jl_symbol("eq_int"),
+jl_symbol("throw_inexacterror"),
 jl_symbol("toUInt64"),
-jl_symbol("check_top_bit"),
-jl_symbol("is_top_bit_set"),
-jl_symbol("isa"),
+jl_symbol("arraysize"),
 jl_symbol("UInt"),
-jl_symbol("haskey"),
 jl_symbol("setproperty!"),
+jl_symbol("check_top_bit"),
+jl_symbol("promote"),
+jl_symbol("unsigned"),
+jl_symbol("is_top_bit_set"),
+jl_symbol("structdiff"),
+jl_symbol("undef"),
+jl_symbol("sizeof"),
+jl_symbol("String"),
+jl_symbol("namedtuple.jl"),
+jl_symbol("pop"),
diff --git a/src/common_symbols2.inc b/src/common_symbols2.inc
index d49528920c0e24..a28f1ef50af242 100644
--- a/src/common_symbols2.inc
+++ b/src/common_symbols2.inc
@@ -1,254 +1,254 @@
-jl_symbol("promote"),
-jl_symbol("undef"),
+jl_symbol("inbounds"),
+jl_symbol("strings/string.jl"),
+jl_symbol("Ref"),
 jl_symbol("Vector"),
-jl_symbol("parent"),
+jl_symbol("kwerr"),
 jl_symbol("_promote"),
-jl_symbol("Ref"),
-jl_symbol("push!"),
-jl_symbol("arraysize"),
-jl_symbol("jl_value_ptr"),
-jl_symbol("mutable"),
-jl_symbol("<<"),
-jl_symbol("pointer_from_objref"),
-jl_symbol("promote_typeof"),
-jl_symbol("unsigned"),
-jl_symbol("zext_int"),
-jl_symbol("strings/string.jl"),
+jl_symbol("sext_int"),
 jl_symbol("pointer"),
-jl_symbol("jl_alloc_array_1d"),
-jl_symbol("inbounds"),
+jl_symbol("similar"),
 jl_symbol("arrayset"),
-jl_symbol("data"),
+jl_symbol("axes1"),
+jl_symbol("eachindex"),
 jl_symbol("|"),
-jl_symbol(">>"),
-jl_symbol("pop"),
-jl_symbol("sizeof"),
-jl_symbol("strings/basic.jl"),
-jl_symbol("namedtuple.jl"),
-jl_symbol("structdiff"),
-jl_symbol("print"),
-jl_symbol("bitarray.jl"),
-jl_symbol("oftype"),
-jl_symbol("kwerr"),
-jl_symbol("adjoint"),
 jl_symbol("ult_int"),
-jl_symbol("isdefined"),
-jl_symbol("shl_int"),
 jl_symbol("lastindex"),
-jl_symbol("DimensionMismatch"),
-jl_symbol("abstractdict.jl"),
-jl_symbol("zero"),
+jl_symbol("setfield!"),
+jl_symbol("UnitRange"),
+jl_symbol("push!"),
 jl_symbol("Bool"),
 jl_symbol("Colon"),
-jl_symbol("copy"),
-jl_symbol("Cvoid"),
 jl_symbol("fieldtype"),
-jl_symbol("add_ptr"),
-jl_symbol("isdone"),
-jl_symbol("eachindex"),
-jl_symbol("eltype"),
-jl_symbol("float.jl"),
 jl_symbol("unitrange_last"),
-jl_symbol("strings/io.jl"),
+jl_symbol("bitarray.jl"),
+jl_symbol("<<"),
+jl_symbol("zext_int"),
+jl_symbol("Tuple"),
+jl_symbol("reflection.jl"),
+jl_symbol("TypeError"),
+jl_symbol("print"),
+jl_symbol("eltype"),
+jl_symbol(">>"),
+jl_symbol("strings/basic.jl"),
 jl_symbol("gc_preserve_begin"),
+jl_symbol("require_one_based_indexing"),
 jl_symbol("gc_preserve_end"),
-jl_symbol("tail"),
-jl_symbol("String"),
-jl_symbol("mul_int"),
+jl_symbol("DimensionMismatch"),
 jl_symbol("indices.jl"),
-jl_symbol("in"),
-jl_symbol("BlasInt"),
-jl_symbol("indices1"),
+jl_symbol("Cvoid"),
+jl_symbol("oftype"),
+jl_symbol("zero"),
+jl_symbol("float.jl"),
 jl_symbol("Any"),
-jl_symbol("min"),
-jl_symbol("Tuple"),
-jl_symbol("error"),
-jl_symbol("gcutils.jl"),
-jl_symbol("ptr"),
+jl_symbol("checkbounds"),
 jl_symbol("or_int"),
+jl_symbol("isdefined"),
 jl_symbol("dict.jl"),
+jl_symbol("strings/io.jl"),
+jl_symbol("shl_int"),
+jl_symbol("copy"),
+jl_symbol("macro expansion"),
+jl_symbol("abstractdict.jl"),
+jl_symbol("in"),
+jl_symbol("io.jl"),
+jl_symbol("BlasInt"),
 jl_symbol("Float64"),
-jl_symbol("Array"),
-jl_symbol("reflection.jl"),
-jl_symbol("transpose"),
-jl_symbol("copyto!"),
-jl_symbol("checkbounds"),
-jl_symbol("stride"),
-jl_symbol("unsafe_load"),
-jl_symbol("show"),
-jl_symbol("broadcasted"),
-jl_symbol("chkstride1"),
-jl_symbol("contents"),
-jl_symbol("_growend!"),
-jl_symbol("argtail"),
-jl_symbol("trunc"),
+jl_symbol("mul_int"),
 jl_symbol("UInt32"),
-jl_symbol("refvalue.jl"),
-jl_symbol("io.jl"),
-jl_symbol("jl_array_grow_end"),
-jl_symbol("multidimensional.jl"),
-jl_symbol("real"),
-jl_symbol("pointerref"),
-jl_symbol("jl_array_ptr"),
-jl_symbol("keys"),
-jl_symbol("Int32"),
-jl_symbol("get"),
-jl_symbol("stop"),
-jl_symbol("liblapack"),
-jl_symbol("Enums.jl"),
-jl_symbol("unsafe_length"),
-jl_symbol("one"),
-jl_symbol("broadcast.jl"),
-jl_symbol("BoundsError"),
-jl_symbol("char.jl"),
 jl_symbol("C_NULL"),
-jl_symbol("x"),
+jl_symbol("Integer"),
+jl_symbol("!=="),
+jl_symbol("merge"),
+jl_symbol("BoundsError"),
+jl_symbol("broadcasted"),
+jl_symbol("Cint"),
+jl_symbol("min"),
+jl_symbol("libblastrampoline"),
+jl_symbol("iszero"),
+jl_symbol("refvalue.jl"),
+jl_symbol("stride"),
+jl_symbol("error"),
 jl_symbol("ncodeunits"),
+jl_symbol("LinearIndices"),
+jl_symbol("Clong"),
+jl_symbol("pair.jl"),
+jl_symbol("_growend!"),
+jl_symbol("char.jl"),
+jl_symbol("copyto!"),
+jl_symbol("get"),
+jl_symbol("tail"),
+jl_symbol("real"),
+jl_symbol("Union"),
+jl_symbol("multidimensional.jl"),
 jl_symbol("enter"),
-jl_symbol("Float32"),
-jl_symbol("value"),
-jl_symbol("write"),
 jl_symbol("leave"),
-jl_symbol("isless"),
+jl_symbol("add_ptr"),
+jl_symbol("chkstride1"),
 jl_symbol("Expr"),
-jl_symbol("gmp.jl"),
-jl_symbol("AssertionError"),
+jl_symbol("write"),
+jl_symbol("broadcast.jl"),
+jl_symbol("show.jl"),
+jl_symbol("none"),
+jl_symbol("Generator"),
+jl_symbol("Int32"),
 jl_symbol("materialize"),
-jl_symbol("Union"),
-jl_symbol("Integer"),
-jl_symbol("neg_int"),
-jl_symbol("print_to_string"),
-jl_symbol("chklapackerror"),
-jl_symbol("prod"),
+jl_symbol("show"),
+jl_symbol("lock"),
+jl_symbol("unsafe_load"),
+jl_symbol("gmp.jl"),
+jl_symbol("mpfr.jl"),
+jl_symbol("Symbol"),
+jl_symbol("Pair"),
 jl_symbol("resize!"),
-jl_symbol("ldiv!"),
-jl_symbol("Cint"),
+jl_symbol("neg_int"),
+jl_symbol("strings/substring.jl"),
+jl_symbol("AssertionError"),
+jl_symbol("identity"),
+jl_symbol("one"),
+jl_symbol("reduce.jl"),
+jl_symbol("libcholmod"),
+jl_symbol("isless"),
+jl_symbol("reducedim.jl"),
 jl_symbol("checksquare"),
-jl_symbol("args"),
-jl_symbol("_length"),
-jl_symbol("!=="),
+jl_symbol("sort.jl"),
+jl_symbol("generator.jl"),
+jl_symbol("pointer_from_objref"),
+jl_symbol("Float32"),
+jl_symbol("chklapackerror"),
+jl_symbol("parent"),
+jl_symbol("task.jl"),
+jl_symbol("div"),
+jl_symbol("cholmod_common"),
+jl_symbol("ht_keyindex"),
+jl_symbol("pop_exception"),
+jl_symbol("c.jl"),
+jl_symbol("firstindex"),
+jl_symbol("some.jl"),
+jl_symbol("iobuffer.jl"),
+jl_symbol("sub_ptr"),
+jl_symbol("vect"),
+jl_symbol("unsafe_string"),
+jl_symbol("llvmcall"),
+jl_symbol("checkindex"),
+jl_symbol("_call_latest"),
+jl_symbol("rethrow"),
+jl_symbol("pointerref"),
+jl_symbol("println"),
+jl_symbol("keys"),
+jl_symbol("RefValue"),
 jl_symbol("_expr"),
-jl_symbol("merge"),
-jl_symbol("dims"),
-jl_symbol("the_exception"),
-jl_symbol("Base"),
 jl_symbol("toUInt32"),
-jl_symbol("mpfr.jl"),
-jl_symbol("<:"),
-jl_symbol("div"),
-jl_symbol("start"),
-jl_symbol("pair.jl"),
+jl_symbol("ismissing"),
+jl_symbol("throw_boundserror"),
+jl_symbol("IteratorSize"),
+jl_symbol("iddict.jl"),
+jl_symbol("to_shape"),
+jl_symbol("Csize_t"),
+jl_symbol("~"),
+jl_symbol("argtail"),
+jl_symbol("include"),
+jl_symbol("set.jl"),
+jl_symbol("isequal"),
 jl_symbol("refpointer.jl"),
-jl_symbol("chunks"),
+jl_symbol("=>"),
 jl_symbol("Val"),
-jl_symbol("show.jl"),
-jl_symbol("sort.jl"),
+jl_symbol("Base"),
+jl_symbol("%"),
+jl_symbol("collect"),
+jl_symbol("Type##kw"),
+jl_symbol("typemax"),
 jl_symbol("fill!"),
-jl_symbol("step"),
-jl_symbol("vals"),
-jl_symbol("toInt32"),
-jl_symbol("mul!"),
-jl_symbol("vect"),
-jl_symbol("len"),
-jl_symbol("ashr_int"),
-jl_symbol("~"),
-jl_symbol("count"),
-jl_symbol("ht_keyindex"),
-jl_symbol("iobuffer.jl"),
-jl_symbol("Generator"),
-jl_symbol("eval"),
-jl_symbol("f"),
-jl_symbol("throw_undef_if_not"),
 jl_symbol("ule_int"),
+jl_symbol("atomics.jl"),
 jl_symbol("libgit2"),
-jl_symbol("head"),
-jl_symbol("LinearIndices"),
-jl_symbol("collect"),
-jl_symbol("set.jl"),
-jl_symbol("lmul!"),
-jl_symbol("offset"),
-jl_symbol("abs"),
-jl_symbol("Symbol"),
-jl_symbol("identity"),
-jl_symbol("typemax"),
+jl_symbol("BigFloat"),
+jl_symbol("ashr_int"),
 jl_symbol("boundscheck"),
-jl_symbol("isequal"),
-jl_symbol("id"),
+jl_symbol("abs"),
 jl_symbol("^"),
-jl_symbol("generator.jl"),
-jl_symbol("=>"),
-jl_symbol("c.jl"),
-jl_symbol("fastmath.jl"),
-jl_symbol("copyast"),
-jl_symbol("IteratorSize"),
-jl_symbol("checkindex"),
-jl_symbol("strings/substring.jl"),
-jl_symbol("println"),
-jl_symbol("throw_boundserror"),
-jl_symbol("io"),
-jl_symbol("dict"),
-jl_symbol("Cstring"),
-jl_symbol("codeunit"),
-jl_symbol("unsafe_string"),
-jl_symbol("n"),
-jl_symbol("close"),
-jl_symbol("BigFloat"),
-jl_symbol("%"),
-jl_symbol("read"),
-jl_symbol("checked.jl"),
-jl_symbol("checked_trunc_sint"),
-jl_symbol("math.jl"),
-jl_symbol("round"),
-jl_symbol("iostream.jl"),
+jl_symbol("ensure_initialized"),
+jl_symbol("_array_for"),
+jl_symbol("strings/util.jl"),
+jl_symbol("Dict"),
 jl_symbol("Nothing"),
-jl_symbol("state"),
 jl_symbol("compiler/ssair/ir.jl"),
-jl_symbol("stream.jl"),
-jl_symbol("Box"),
-jl_symbol("missing.jl"),
-jl_symbol("rmul!"),
-jl_symbol("process.jl"),
-jl_symbol("Core"),
-jl_symbol("reduce.jl"),
-jl_symbol("SizeUnknown"),
-jl_symbol("diag"),
-jl_symbol("atomics.jl"),
-jl_symbol("promote_rule"),
-jl_symbol("_mod64"),
-jl_symbol("llvmcall"),
-jl_symbol("reducedim.jl"),
-jl_symbol("to_shape"),
-jl_symbol("ComplexF32"),
-jl_symbol("HasShape"),
-jl_symbol("block"),
-jl_symbol("checked_trunc_uint"),
-jl_symbol("float"),
-jl_symbol("unsafe_trunc"),
-jl_symbol("isnan"),
+jl_symbol("unsafe_write"),
+jl_symbol("util.jl"),
+jl_symbol("toInt32"),
+jl_symbol("loading.jl"),
+jl_symbol("value"),
+jl_symbol("expr.jl"),
+jl_symbol("print_to_string"),
+jl_symbol("the_exception"),
+jl_symbol("nonzeros"),
+jl_symbol("<:"),
+jl_symbol("KeyError"),
 jl_symbol("xor"),
-jl_symbol("task.jl"),
-jl_symbol("complex.jl"),
-jl_symbol(">>>"),
-jl_symbol("bitset.jl"),
 jl_symbol("logging.jl"),
-jl_symbol("s"),
-jl_symbol("libmpfr"),
+jl_symbol("stat.jl"),
+jl_symbol("close"),
+jl_symbol("adjoint"),
+jl_symbol("meta"),
+jl_symbol("path.jl"),
+jl_symbol("round"),
+jl_symbol("Cstring"),
+jl_symbol("SizeUnknown"),
+jl_symbol("esc"),
+jl_symbol("missing.jl"),
+jl_symbol("throw_undef_if_not"),
+jl_symbol("error.jl"),
+jl_symbol("Type"),
+jl_symbol("mul!"),
+jl_symbol("math.jl"),
+jl_symbol("unsafe_trunc"),
 jl_symbol("missing"),
-jl_symbol("nzval"),
-jl_symbol("special/trig.jl"),
-jl_symbol("loading.jl"),
-jl_symbol("KeyError"),
-jl_symbol("cmp"),
-jl_symbol("promote_type"),
 jl_symbol("subarray.jl"),
-jl_symbol("handle"),
+jl_symbol("noinline"),
+jl_symbol("isnan"),
+jl_symbol("ldiv!"),
+jl_symbol("DataType"),
+jl_symbol("codeunit"),
+jl_symbol("condition.jl"),
+jl_symbol("step"),
+jl_symbol("copyast"),
+jl_symbol("bitset.jl"),
+jl_symbol("float"),
+jl_symbol("fastmath.jl"),
+jl_symbol("_mod64"),
 jl_symbol("_div64"),
-jl_symbol("ht"),
-jl_symbol("UInt128"),
-jl_symbol("zeros"),
-jl_symbol("Dict"),
 jl_symbol("all"),
-jl_symbol("ComplexF64"),
+jl_symbol("parse"),
+jl_symbol("joinpath"),
+jl_symbol("nextind"),
+jl_symbol("regex.jl"),
+jl_symbol("Enums.jl"),
+jl_symbol("promote_type"),
+jl_symbol("Cdouble"),
+jl_symbol("ComplexF32"),
+jl_symbol("read"),
+jl_symbol("intfuncs.jl"),
 jl_symbol("Complex"),
-jl_symbol("checked_add"),
-jl_symbol("mod"),
+jl_symbol("_deleteend!"),
+jl_symbol("stat"),
+jl_symbol("UnionAll"),
+jl_symbol("special/trig.jl"),
+jl_symbol("UInt128"),
+jl_symbol("_copyto_impl!"),
+jl_symbol("stream.jl"),
+jl_symbol("lmul!"),
+jl_symbol("repr"),
+jl_symbol("promote_rule"),
+jl_symbol("xor_int"),
+jl_symbol("complex.jl"),
+jl_symbol("transpose"),
+jl_symbol(">>>"),
+jl_symbol("cholmod_sparse"),
+jl_symbol("filemode"),
+jl_symbol("ComplexF64"),
+jl_symbol("SparseMatrixCSC"),
+jl_symbol("view"),
+jl_symbol("GitError"),
+jl_symbol("zeros"),
+jl_symbol("InexactError"),
+jl_symbol("LogLevel"),
+jl_symbol("between"),
diff --git a/src/datatype.c b/src/datatype.c
index 86ad3170a701e3..8052719c6f55f7 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -42,9 +42,9 @@ static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_methtable_t *mt =
-        (jl_methtable_t*)jl_gc_alloc(ptls, sizeof(jl_methtable_t),
+        (jl_methtable_t*)jl_gc_alloc(ct->ptls, sizeof(jl_methtable_t),
                                      jl_methtable_type);
     mt->name = jl_demangle_typename(name);
     mt->module = module;
@@ -60,11 +60,11 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
     return mt;
 }
 
-JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *module)
+JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *module, int abstract, int mutabl)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_typename_t *tn =
-        (jl_typename_t*)jl_gc_alloc(ptls, sizeof(jl_typename_t),
+        (jl_typename_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typename_t),
                                     jl_typename_type);
     tn->name = name;
     tn->module = module;
@@ -73,8 +73,12 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
     tn->linearcache = jl_emptysvec;
     tn->names = NULL;
     tn->hash = bitmix(bitmix(module ? module->build_id : 0, name->hash), 0xa1ada1da);
+    tn->abstract = abstract;
+    tn->mutabl = mutabl;
+    tn->mayinlinealloc = 0;
     tn->mt = NULL;
     tn->partial = NULL;
+    tn->atomicfields = NULL;
     return tn;
 }
 
@@ -82,26 +86,24 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
 
 jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module, jl_datatype_t *super, jl_svec_t *parameters)
 {
-    return jl_new_datatype((jl_sym_t*)name, module, super, parameters, jl_emptysvec, jl_emptysvec, 1, 0, 0);
+    return jl_new_datatype((jl_sym_t*)name, module, super, parameters, jl_emptysvec, jl_emptysvec, jl_emptysvec, 1, 0, 0);
 }
 
 jl_datatype_t *jl_new_uninitialized_datatype(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ptls, sizeof(jl_datatype_t), jl_datatype_type);
+    jl_task_t *ct = jl_current_task;
+    jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type);
     t->hash = 0;
     t->hasfreetypevars = 0;
     t->isdispatchtuple = 0;
     t->isbitstype = 0;
     t->zeroinit = 0;
-    t->isinlinealloc = 0;
     t->has_concrete_subtype = 1;
     t->cached_by_hash = 0;
     t->name = NULL;
     t->super = NULL;
     t->parameters = NULL;
     t->layout = NULL;
-    t->names = NULL;
     t->types = NULL;
     t->instance = NULL;
     return t;
@@ -218,21 +220,47 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
     return next_power_of_two(size);
 }
 
-STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d)
+STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 {
-    return (!d->abstract && jl_datatype_size(d) == 0 && d != jl_symbol_type && d->name != jl_array_typename &&
-            d->isconcretetype && !d->mutabl);
+    return (!d->name->abstract && jl_datatype_size(d) == 0 && d != jl_symbol_type && d->name != jl_array_typename &&
+            d->isconcretetype && !d->name->mutabl);
 }
 
-STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st)
+STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
     if (jl_is_datatype_make_singleton(st)) {
         // It's possible for st to already have an ->instance if it was redefined
-        if (!st->instance) {
-            st->instance = jl_gc_alloc(jl_get_ptls_states(), 0, st);
-            jl_gc_wb(st, st->instance);
+        if (!st->instance)
+            st->instance = jl_gc_permobj(0, st);
+    }
+}
+
+// return whether all concrete subtypes of this type have the same layout
+int jl_struct_try_layout(jl_datatype_t *dt)
+{
+    if (dt->layout)
+        return 1;
+    else if (!jl_has_fixed_layout(dt))
+        return 0;
+    jl_compute_field_offsets(dt);
+    assert(dt->layout);
+    return 1;
+}
+
+int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree) JL_NOTSAFEPOINT
+{
+    if (ty->name->mayinlinealloc && (ty->isconcretetype || ((jl_datatype_t*)jl_unwrap_unionall(ty->name->wrapper))->layout)) { // TODO: use jl_struct_try_layout(dt) (but it is a safepoint)
+        if (ty->layout->npointers > 0) {
+            if (pointerfree)
+                return 0;
+            if (ty->name->n_uninitialized != 0)
+                return 0;
+            if (ty->layout->fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
+                return 0;
         }
+        return 1;
     }
+    return 0;
 }
 
 static unsigned union_isinlinable(jl_value_t *ty, int pointerfree, size_t *nbytes, size_t *align, int asfield) JL_NOTSAFEPOINT
@@ -246,7 +274,7 @@ static unsigned union_isinlinable(jl_value_t *ty, int pointerfree, size_t *nbyte
             return 0;
         return na + nb;
     }
-    if (jl_is_datatype(ty) && jl_datatype_isinlinealloc(ty) && (!pointerfree || ((jl_datatype_t*)ty)->layout->npointers == 0)) {
+    if (jl_is_datatype(ty) && jl_datatype_isinlinealloc((jl_datatype_t*)ty, pointerfree)) {
         size_t sz = jl_datatype_size(ty);
         size_t al = jl_datatype_align(ty);
         // primitive types in struct slots need their sizes aligned. issue #37974
@@ -314,31 +342,6 @@ int jl_pointer_egal(jl_value_t *t)
     return 0;
 }
 
-static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout) JL_NOTSAFEPOINT
-{
-    if (jl_is_uniontype(p))
-        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout) ||
-               references_name(((jl_uniontype_t*)p)->b, name, affects_layout);
-    if (jl_is_unionall(p))
-        return references_name((jl_value_t*)((jl_unionall_t*)p)->var, name, 0) ||
-               references_name(((jl_unionall_t*)p)->body, name, affects_layout);
-    if (jl_is_typevar(p))
-        return references_name(((jl_tvar_t*)p)->ub, name, 0) ||
-               references_name(((jl_tvar_t*)p)->lb, name, 0);
-    if (jl_is_datatype(p)) {
-        jl_datatype_t *dp = (jl_datatype_t*)p;
-        if (affects_layout && dp->name == name)
-            return 1;
-        affects_layout = dp->types == NULL || jl_svec_len(dp->types) != 0;
-        size_t i, l = jl_nparams(p);
-        for (i = 0; i < l; i++) {
-            if (references_name(jl_tparam(p, i), name, affects_layout))
-                return 1;
-        }
-    }
-    return 0;
-}
-
 static void throw_ovf(int should_malloc, void *desc, jl_datatype_t* st, int offset)
 {
     if (should_malloc)
@@ -351,17 +354,9 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     const uint64_t max_offset = (((uint64_t)1) << 32) - 1;
     const uint64_t max_size = max_offset >> 1;
 
-    if (st->types == NULL || st->name->wrapper == NULL)
-        return;
-    if ((jl_is_tuple_type(st) || jl_is_namedtuple_type(st)) && !jl_is_concrete_type((jl_value_t*)st))
-        return;
+    if (st->name->wrapper == NULL)
+        return; // we got called too early--we'll be back
     jl_datatype_t *w = (jl_datatype_t*)jl_unwrap_unionall(st->name->wrapper);
-    if (w->types == NULL) // we got called too early--we'll be back
-        return;
-    size_t i, nfields = jl_svec_len(st->types);
-    int isinlinealloc = st->isconcretetype && !st->mutabl;
-    int isbitstype = isinlinealloc;
-    assert(st->ninitialized <= nfields);
     if (st == w && st->layout) {
         // this check allows us to force re-computation of the layout for some types during init
         st->layout = NULL;
@@ -369,6 +364,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         st->zeroinit = 0;
         st->has_concrete_subtype = 1;
     }
+    int isbitstype = st->isconcretetype && st->name->mayinlinealloc;
     // If layout doesn't depend on type parameters, it's stored in st->name->wrapper
     // and reused by all subtypes.
     if (w->layout) {
@@ -376,11 +372,16 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         st->size = w->size;
         st->zeroinit = w->zeroinit;
         st->has_concrete_subtype = w->has_concrete_subtype;
-        if (jl_is_layout_opaque(st->layout)) { // e.g. jl_array_typename
-            return;
+        if (!jl_is_layout_opaque(st->layout)) { // e.g. jl_array_typename
+            st->isbitstype = isbitstype && st->layout->npointers == 0;
+            jl_maybe_allocate_singleton_instance(st);
         }
+        return;
     }
-    else if (nfields == 0) {
+    assert(st->types && w->types);
+    size_t i, nfields = jl_svec_len(st->types);
+    assert(st->name->n_uninitialized <= nfields);
+    if (nfields == 0) {
         // if we have no fields, we can trivially skip the rest
         if (st == jl_symbol_type || st == jl_string_type) {
             // opaque layout - heap-allocated blob
@@ -401,7 +402,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     }
     else {
         // compute a conservative estimate of whether there could exist an instance of a subtype of this
-        for (i = 0; st->has_concrete_subtype && i < st->ninitialized; i++) {
+        for (i = 0; st->has_concrete_subtype && i < nfields - st->name->n_uninitialized; i++) {
             jl_value_t *fld = jl_svecref(st->types, i);
             if (fld == jl_bottom_type)
                 st->has_concrete_subtype = 0;
@@ -409,32 +410,15 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                 st->has_concrete_subtype = !jl_is_datatype(fld) || ((jl_datatype_t *)fld)->has_concrete_subtype;
         }
         // compute layout for the wrapper object if the field types have no free variables
-        if (!st->isconcretetype) {
-            if (st != w)
-                return; // otherwise we would leak memory
-            for (i = 0; i < nfields; i++) {
-                if (jl_has_free_typevars(jl_field_type(st, i)))
-                    return; // not worthwhile computing the rest
-            }
+        if (!st->isconcretetype && !jl_has_fixed_layout(st)) {
+            assert(st == w); // otherwise caller should not have requested this layout
+            return;
         }
     }
 
-    // compute whether this type may ever be inlined
-    // based solely on whether its definition is self-referential
-    if (isinlinealloc) {
-        size_t i, nf = jl_svec_len(w->types);
-        for (i = 0; i < nf; i++) {
-            jl_value_t *fld = jl_svecref(w->types, i);
-            if (references_name(fld, w->name, 1)) {
-                isinlinealloc = 0;
-                isbitstype = 0;
-                break;
-            }
-        }
-        for (i = 0; isbitstype && i < nfields; i++) {
-            jl_value_t *fld = jl_field_type(st, i);
-            isbitstype = jl_isbits(fld);
-        }
+    for (i = 0; isbitstype && i < nfields; i++) {
+        jl_value_t *fld = jl_field_type(st, i);
+        isbitstype = jl_isbits(fld);
     }
 
     // if we didn't reuse the layout above, compute it now
@@ -452,12 +436,14 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         int zeroinit = 0;
         int haspadding = 0;
         int homogeneous = 1;
+        int needlock = 0;
         uint32_t npointers = 0;
         jl_value_t *firstty = jl_field_type(st, 0);
         for (i = 0; i < nfields; i++) {
             jl_value_t *fld = jl_field_type(st, i);
+            int isatomic = jl_field_isatomic(st, i);
             size_t fsz = 0, al = 1;
-            if (jl_islayout_inline(fld, &fsz, &al)) { // aka jl_datatype_isinlinealloc
+            if (jl_islayout_inline(fld, &fsz, &al) && (!isatomic || jl_is_datatype(fld))) { // aka jl_datatype_isinlinealloc
                 if (__unlikely(fsz > max_size))
                     // Should never happen
                     throw_ovf(should_malloc, desc, st, fsz);
@@ -471,7 +457,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                     uint32_t fld_npointers = ((jl_datatype_t*)fld)->layout->npointers;
                     if (((jl_datatype_t*)fld)->layout->haspadding)
                         haspadding = 1;
-                    if (i >= st->ninitialized && fld_npointers &&
+                    if (i >= nfields - st->name->n_uninitialized && fld_npointers &&
                         fld_npointers * sizeof(void*) != fsz) {
                         // field may be undef (may be uninitialized and contains pointer),
                         // and contains non-pointer fields of non-zero sizes.
@@ -495,9 +481,13 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                     haspadding = 1;
                 }
             }
+            if (isatomic && fsz > MAX_ATOMIC_SIZE)
+                needlock = 1;
+            if (isatomic && fsz <= MAX_ATOMIC_SIZE)
+                al = fsz = next_power_of_two(fsz);
             if (al != 0) {
                 size_t alsz = LLT_ALIGN(sz, al);
-                if (sz & (al - 1))
+                if (alsz != sz)
                     haspadding = 1;
                 sz = alsz;
                 if (al > alignm)
@@ -510,6 +500,16 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                 throw_ovf(should_malloc, desc, st, sz);
             sz += fsz;
         }
+        if (needlock) {
+            size_t offset = LLT_ALIGN(sizeof(jl_mutex_t), alignm);
+            for (i = 0; i < nfields; i++) {
+                desc[i].offset += offset;
+            }
+            if (__unlikely(max_offset - sz < offset))
+                throw_ovf(should_malloc, desc, st, sz);
+            sz += offset;
+            haspadding = 1;
+        }
         if (homogeneous && jl_is_tuple_type(st)) {
             // Some tuples become LLVM vectors with stronger alignment than what was calculated above.
             unsigned al = jl_special_vector_alignment(nfields, firstty);
@@ -548,14 +548,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     }
     // now finish deciding if this instantiation qualifies for special properties
     assert(!isbitstype || st->layout->npointers == 0); // the definition of isbits
-    if (isinlinealloc && st->layout->npointers > 0) {
-        if (st->ninitialized != nfields)
-            isinlinealloc = 0;
-        else if (st->layout->fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
-            isinlinealloc = 0;
-    }
     st->isbitstype = isbitstype;
-    st->isinlinealloc = isinlinealloc;
     jl_maybe_allocate_singleton_instance(st);
     return;
 }
@@ -575,6 +568,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
         jl_svec_t *parameters,
         jl_svec_t *fnames,
         jl_svec_t *ftypes,
+        jl_svec_t *fattrs,
         int abstract, int mutabl,
         int ninitialized)
 {
@@ -592,18 +586,17 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     jl_gc_wb(t, t->parameters);
     t->types = ftypes;
     if (ftypes != NULL) jl_gc_wb(t, t->types);
-    t->abstract = abstract;
-    t->mutabl = mutabl;
-    t->ninitialized = ninitialized;
     t->size = 0;
 
     t->name = NULL;
     if (jl_is_typename(name)) {
         // This code-path is used by the Serialization module to by-pass normal expectations
         tn = (jl_typename_t*)name;
+        tn->abstract = abstract;
+        tn->mutabl = mutabl;
     }
     else {
-        tn = jl_new_typename_in((jl_sym_t*)name, module);
+        tn = jl_new_typename_in((jl_sym_t*)name, module, abstract, mutabl);
         if (super == jl_function_type || super == jl_builtin_type || is_anonfn_typename(jl_symbol_name(name))) {
             // Callable objects (including compiler-generated closures) get independent method tables
             // as an optimization
@@ -621,6 +614,41 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     jl_gc_wb(t, t->name);
     t->name->names = fnames;
     jl_gc_wb(t->name, t->name->names);
+    tn->n_uninitialized = jl_svec_len(fnames) - ninitialized;
+
+    uint32_t *volatile atomicfields = NULL;
+    int i;
+    JL_TRY {
+        for (i = 0; i + 1 < jl_svec_len(fattrs); i += 2) {
+            jl_value_t *fldi = jl_svecref(fattrs, i);
+            jl_sym_t *attr = (jl_sym_t*)jl_svecref(fattrs, i + 1);
+            JL_TYPECHK(typeassert, long, fldi);
+            JL_TYPECHK(typeassert, symbol, (jl_value_t*)attr);
+            size_t fldn = jl_unbox_long(fldi);
+            if (fldn < 1 || fldn > jl_svec_len(fnames))
+                jl_errorf("invalid field attribute %lld", (long long)fldn);
+            fldn--;
+            if (attr == atomic_sym) {
+                if (!mutabl)
+                    jl_errorf("invalid field attribute atomic for immutable struct");
+                if (atomicfields == NULL) {
+                    size_t nb = (jl_svec_len(fnames) + 31) / 32 * sizeof(uint32_t);
+                    atomicfields = (uint32_t*)malloc_s(nb);
+                    memset(atomicfields, 0, nb);
+                }
+                atomicfields[fldn / 32] |= 1 << (fldn % 32);
+            }
+            else {
+                jl_errorf("invalid field attribute %s", jl_symbol_name(attr));
+            }
+        }
+    }
+    JL_CATCH {
+        if (atomicfields)
+            free(atomicfields);
+        jl_rethrow();
+    }
+    tn->atomicfields = atomicfields;
 
     if (t->name->wrapper == NULL) {
         t->name->wrapper = (jl_value_t*)t;
@@ -630,10 +658,12 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
             t->name->wrapper = jl_new_struct(jl_unionall_type, jl_svecref(parameters, i), t->name->wrapper);
             jl_gc_wb(t->name, t->name->wrapper);
         }
+        if (!mutabl && !abstract && ftypes != NULL)
+            tn->mayinlinealloc = 1;
     }
     jl_precompute_memoized_dt(t, 0);
 
-    if (!abstract)
+    if (!abstract && t->types != NULL)
         jl_compute_field_offsets(t);
 
     JL_GC_POP();
@@ -645,12 +675,12 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
                                                  jl_svec_t *parameters, size_t nbits)
 {
     jl_datatype_t *bt = jl_new_datatype((jl_sym_t*)name, module, super, parameters,
-                                        jl_emptysvec, jl_emptysvec, 0, 0, 0);
+                                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     uint32_t nbytes = (nbits + 7) / 8;
     uint32_t alignm = next_power_of_two(nbytes);
     if (alignm > MAX_ALIGN)
         alignm = MAX_ALIGN;
-    bt->isbitstype = bt->isinlinealloc = (parameters == jl_emptysvec);
+    bt->isbitstype = (parameters == jl_emptysvec);
     bt->size = nbytes;
     bt->layout = jl_get_layout(0, 0, alignm, 0, NULL, NULL);
     bt->instance = NULL;
@@ -666,7 +696,7 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
                                                  int large)
 {
     jl_datatype_t *bt = jl_new_datatype(name, module, super,
-      jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+      jl_emptysvec, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
     bt->size = large ? GC_MAX_SZCLASS+1 : 0;
     jl_datatype_layout_t *layout = (jl_datatype_layout_t *)
       jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t),
@@ -685,13 +715,74 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
     return bt;
 }
 
+JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
+{
+    return jl_is_datatype(dt) && dt->layout && dt->layout->fielddesc_type == 3;
+}
+
+
 // bits constructors ----------------------------------------------------------
 
-JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data)
+#if MAX_ATOMIC_SIZE > MAX_POINTERATOMIC_SIZE
+#error MAX_ATOMIC_SIZE too large
+#endif
+#if MAX_POINTERATOMIC_SIZE > 16
+#error MAX_POINTERATOMIC_SIZE too large
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+#ifndef _P64
+#error 12 byte GC pool size not implemented for 32-bit
+#endif
+typedef __uint128_t uint128_t;
+typedef uint128_t jl_uatomicmax_t;
+#else
+typedef uint64_t jl_uatomicmax_t;
+#endif
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+#error using masks for atomics (instead of memcpy like nb == 16) assumes little endian
+#endif
+
+static inline uint32_t zext_read32(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
+{
+    uint32_t y = *(uint32_t*)x;
+    if (nb == 4)
+        return y;
+    else // if (nb == 3)
+        return 0xffffffu & y;
+}
+
+#if MAX_POINTERATOMIC_SIZE >= 8
+static inline uint64_t zext_read64(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
+{
+    uint64_t y = *(uint64_t*)x;
+    if (nb == 8)
+        return y;
+    else if (nb == 7)
+        return 0xffffffffffffffu & y;
+    else if (nb == 6)
+        return 0xffffffffffffu & y;
+    else // if (nb == 5)
+        return 0xffffffffffu & y;
+}
+#endif
+
+#if MAX_POINTERATOMIC_SIZE >= 16
+static inline uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
+{
+    uint128_t y = 0;
+    if (nb == 16)
+        y = *(uint128_t*)x;
+    else
+        memcpy(&y, x, nb);
+    return y;
+}
+#endif
+
+JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
 {
     // data may not have the alignment required by the size
     // but will always have the alignment required by the datatype
-    jl_ptls_t ptls = jl_get_ptls_states();
     assert(jl_is_datatype(dt));
     jl_datatype_t *bt = (jl_datatype_t*)dt;
     size_t nb = jl_datatype_size(bt);
@@ -708,44 +799,275 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data)
     if (bt == jl_uint16_type)  return jl_box_uint16(*(uint16_t*)data);
     if (bt == jl_char_type)    return jl_box_char(*(uint32_t*)data);
 
-    jl_value_t *v = jl_gc_alloc(ptls, nb, bt);
-    switch (nb) {
-    case  1: *(uint8_t*) v = *(uint8_t*)data;    break;
-    case  2: *(uint16_t*)v = jl_load_unaligned_i16(data);   break;
-    case  4: *(uint32_t*)v = jl_load_unaligned_i32(data);   break;
-    case  8: *(uint64_t*)v = jl_load_unaligned_i64(data);   break;
-    case 16:
-        memcpy(jl_assume_aligned(v, 16), data, 16);
-        break;
-    default: memcpy(v, data, nb);
-    }
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
+    memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb);
     return v;
 }
 
-// used by boot.jl
-JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt)
+JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data)
 {
-    uint64_t data = 0xffffffffffffffffULL;
-    jl_value_t *v = jl_gc_alloc(jl_get_ptls_states(), sizeof(size_t), bt);
-    memcpy(v, &data, sizeof(size_t));
+    // data must have the required alignment for an atomic of the given size
+    assert(jl_is_datatype(dt));
+    jl_datatype_t *bt = (jl_datatype_t*)dt;
+    size_t nb = jl_datatype_size(bt);
+    // some types have special pools to minimize allocations
+    if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
+    if (bt == jl_bool_type)    return (1 & jl_atomic_load((int8_t*)data)) ? jl_true : jl_false;
+    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_load((uint8_t*)data));
+    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_load((int64_t*)data));
+    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_load((int32_t*)data));
+    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_load((int8_t*)data));
+    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_load((int16_t*)data));
+    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_load((uint64_t*)data));
+    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_load((uint32_t*)data));
+    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_load((uint16_t*)data));
+    if (bt == jl_char_type)    return jl_box_char(jl_atomic_load((uint32_t*)data));
+
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
+    // data is aligned to the power of two,
+    // we will write too much of v, but the padding should exist
+    if (nb == 1)
+        *(uint8_t*) v = jl_atomic_load((uint8_t*)data);
+    else if (nb <= 2)
+        *(uint16_t*)v = jl_atomic_load((uint16_t*)data);
+    else if (nb <= 4)
+        *(uint32_t*)v = jl_atomic_load((uint32_t*)data);
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8)
+        *(uint64_t*)v = jl_atomic_load((uint64_t*)data);
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16)
+        *(uint128_t*)v = jl_atomic_load((uint128_t*)data);
+#endif
+    else
+        abort();
+    return v;
+}
+
+JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb)
+{
+    // dst must have the required alignment for an atomic of the given size
+    // src must be aligned by the GC
+    // we may therefore read too much from src, but will zero the excess bits
+    // before the store (so that we can get faster cmpswap later)
+    if (nb == 0)
+        ;
+    else if (nb == 1)
+        jl_atomic_store((uint8_t*)dst, *(uint8_t*)src);
+    else if (nb == 2)
+        jl_atomic_store((uint16_t*)dst, *(uint16_t*)src);
+    else if (nb <= 4)
+        jl_atomic_store((uint32_t*)dst, zext_read32(src, nb));
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8)
+        jl_atomic_store((uint64_t*)dst, zext_read64(src, nb));
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16)
+        jl_atomic_store((uint128_t*)dst, zext_read128(src, nb));
+#endif
+    else
+        abort();
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb)
+{
+    // dst must have the required alignment for an atomic of the given size
+    assert(jl_is_datatype(dt));
+    jl_datatype_t *bt = (jl_datatype_t*)dt;
+    // some types have special pools to minimize allocations
+    if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
+    if (bt == jl_bool_type)    return (1 & jl_atomic_exchange((int8_t*)dst, 1 & *(int8_t*)src)) ? jl_true : jl_false;
+    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_exchange((uint8_t*)dst, *(int8_t*)src));
+    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_exchange((int64_t*)dst, *(int64_t*)src));
+    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_exchange((int32_t*)dst, *(int32_t*)src));
+    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_exchange((int8_t*)dst, *(int8_t*)src));
+    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_exchange((int16_t*)dst, *(int16_t*)src));
+    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_exchange((uint64_t*)dst, *(uint64_t*)src));
+    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_exchange((uint32_t*)dst, *(uint32_t*)src));
+    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src));
+    if (bt == jl_char_type)    return jl_box_char(jl_atomic_exchange((uint32_t*)dst, *(uint32_t*)src));
+
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt);
+    if (nb == 1)
+        *(uint8_t*)v = jl_atomic_exchange((uint8_t*)dst, *(uint8_t*)src);
+    else if (nb == 2)
+        *(uint16_t*)v = jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src);
+    else if (nb <= 4)
+        *(uint32_t*)v = jl_atomic_exchange((uint32_t*)dst, zext_read32(src, nb));
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8)
+        *(uint64_t*)v = jl_atomic_exchange((uint64_t*)dst, zext_read64(src, nb));
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16)
+        *(uint128_t*)v = jl_atomic_exchange((uint128_t*)dst, zext_read128(src, nb));
+#endif
+    else
+        abort();
     return v;
 }
 
-void jl_assign_bits(void *dest, jl_value_t *bits) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
 {
-    // bits must be a heap box.
-    size_t nb = jl_datatype_size(jl_typeof(bits));
-    if (nb == 0) return;
-    switch (nb) {
-    case  1: *(uint8_t*)dest    = *(uint8_t*)bits;    break;
-    case  2: jl_store_unaligned_i16(dest, *(uint16_t*)bits); break;
-    case  4: jl_store_unaligned_i32(dest, *(uint32_t*)bits); break;
-    case  8: jl_store_unaligned_i64(dest, *(uint64_t*)bits); break;
-    case 16:
-        memcpy(dest, jl_assume_aligned(bits, 16), 16);
-        break;
-    default: memcpy(dest, bits, nb);
+    // dst must have the required alignment for an atomic of the given size
+    // n.b.: this can spuriously fail if there are padding bits, the caller should deal with that
+    int success;
+    if (nb == 0) {
+        success = 1;
+    }
+    else if (nb == 1) {
+        uint8_t y = *(uint8_t*)expected;
+        success = jl_atomic_cmpswap((uint8_t*)dst, &y, *(uint8_t*)src);
+    }
+    else if (nb == 2) {
+        uint16_t y = *(uint16_t*)expected;
+        success = jl_atomic_cmpswap((uint16_t*)dst, &y, *(uint16_t*)src);
+    }
+    else if (nb <= 4) {
+        uint32_t y = zext_read32(expected, nb);
+        uint32_t z = zext_read32(src, nb);
+        success = jl_atomic_cmpswap((uint32_t*)dst, &y, z);
+    }
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8) {
+        uint64_t y = zext_read64(expected, nb);
+        uint64_t z = zext_read64(src, nb);
+        success = jl_atomic_cmpswap((uint64_t*)dst, &y, z);
+    }
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16) {
+        uint128_t y = zext_read128(expected, nb);
+        uint128_t z = zext_read128(src, nb);
+        success = jl_atomic_cmpswap((uint128_t*)dst, &y, z);
+    }
+#endif
+    else {
+        abort();
+    }
+    return success;
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettyp, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
+{
+    // dst must have the required alignment for an atomic of the given size
+    // n.b.: this does not spuriously fail if there are padding bits
+    jl_task_t *ct = jl_current_task;
+    int isptr = jl_field_isptr(rettyp, 0);
+    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : rettyp->size, isptr ? dt : rettyp);
+    int success;
+    jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected);
+    if (nb == 0) {
+        success = (dt == et);
+    }
+    else if (nb == 1) {
+        uint8_t *y8 = (uint8_t*)y;
+        assert(!dt->layout->haspadding);
+        if (dt == et) {
+            *y8 = *(uint8_t*)expected;
+            uint8_t z8 = *(uint8_t*)src;
+            success = jl_atomic_cmpswap((uint8_t*)dst, y8, z8);
+        }
+        else {
+            *y8 = jl_atomic_load((uint8_t*)dst);
+            success = 0;
+        }
+    }
+    else if (nb == 2) {
+        uint16_t *y16 = (uint16_t*)y;
+        assert(!dt->layout->haspadding);
+        if (dt == et) {
+            *y16 = *(uint16_t*)expected;
+            uint16_t z16 = *(uint16_t*)src;
+            success = jl_atomic_cmpswap((uint16_t*)dst, y16, z16);
+        }
+        else {
+            *y16 = jl_atomic_load((uint16_t*)dst);
+            success = 0;
+        }
+    }
+    else if (nb <= 4) {
+        uint32_t *y32 = (uint32_t*)y;
+        if (dt == et) {
+            *y32 = zext_read32(expected, nb);
+            uint32_t z32 = zext_read32(src, nb);
+            while (1) {
+                success = jl_atomic_cmpswap((uint32_t*)dst, y32, z32);
+                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                    break;
+            }
+        }
+        else {
+            *y32 = jl_atomic_load((uint32_t*)dst);
+            success = 0;
+        }
+    }
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8) {
+        uint64_t *y64 = (uint64_t*)y;
+        if (dt == et) {
+            *y64 = zext_read64(expected, nb);
+            uint64_t z64 = zext_read64(src, nb);
+            while (1) {
+                success = jl_atomic_cmpswap((uint64_t*)dst, y64, z64);
+                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                    break;
+            }
+        }
+        else {
+            *y64 = jl_atomic_load((uint64_t*)dst);
+            success = 0;
+        }
+    }
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16) {
+        uint128_t *y128 = (uint128_t*)y;
+        if (dt == et) {
+            *y128 = zext_read128(expected, nb);
+            uint128_t z128 = zext_read128(src, nb);
+            while (1) {
+                success = jl_atomic_cmpswap((uint128_t*)dst, y128, z128);
+                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                    break;
+            }
+        }
+        else {
+            *y128 = jl_atomic_load((uint128_t*)dst);
+            success = 0;
+        }
+    }
+#endif
+    else {
+        abort();
+    }
+    if (isptr) {
+        JL_GC_PUSH1(&y);
+        jl_value_t *z = jl_gc_alloc(ct->ptls, rettyp->size, rettyp);
+        *(jl_value_t**)z = y;
+        JL_GC_POP();
+        y = z;
+        nb = sizeof(jl_value_t*);
     }
+    *((uint8_t*)y + nb) = success ? 1 : 0;
+    return y;
+}
+
+
+
+// used by boot.jl
+JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt)
+{
+    uint64_t data = 0xffffffffffffffffULL;
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(size_t), bt);
+    memcpy(v, &data, sizeof(size_t));
+    return v;
 }
 
 #define PERMBOXN_FUNC(nb,nw)                                            \
@@ -786,14 +1108,14 @@ UNBOX_FUNC(float64, double)
 UNBOX_FUNC(voidpointer, void*)
 UNBOX_FUNC(uint8pointer, uint8_t*)
 
-#define BOX_FUNC(typ,c_type,pfx,nw)                             \
-    JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x)              \
-    {                                                           \
-        jl_ptls_t ptls = jl_get_ptls_states();                  \
-        jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*),   \
-                                    jl_##typ##_type);           \
-        *(c_type*)jl_data_ptr(v) = x;                           \
-        return v;                                               \
+#define BOX_FUNC(typ,c_type,pfx,nw)                                     \
+    JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x)                      \
+    {                                                                   \
+        jl_task_t *ct = jl_current_task;                                \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+                                    jl_##typ##_type);                   \
+        *(c_type*)jl_data_ptr(v) = x;                                   \
+        return v;                                                       \
     }
 BOX_FUNC(float32, float,  jl_box, 1)
 BOX_FUNC(voidpointer, void*,  jl_box, 1)
@@ -807,29 +1129,29 @@ BOX_FUNC(float64, double, jl_box, 2)
 #define NBOX_C 1024
 
 #define SIBOX_FUNC(typ,c_type,nw)\
-    static jl_value_t *boxed_##typ##_cache[NBOX_C];             \
-    JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)             \
-    {                                                           \
-        jl_ptls_t ptls = jl_get_ptls_states();                  \
-        c_type idx = x+NBOX_C/2;                                \
-        if ((u##c_type)idx < (u##c_type)NBOX_C)                 \
-            return boxed_##typ##_cache[idx];                    \
-        jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*),   \
-                                    jl_##typ##_type);           \
-        *(c_type*)jl_data_ptr(v) = x;                           \
-        return v;                                               \
-    }
-#define UIBOX_FUNC(typ,c_type,nw)                               \
-    static jl_value_t *boxed_##typ##_cache[NBOX_C];             \
-    JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)             \
-    {                                                           \
-        jl_ptls_t ptls = jl_get_ptls_states();                  \
-        if (x < NBOX_C)                                         \
-            return boxed_##typ##_cache[x];                      \
-        jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*),   \
-                                    jl_##typ##_type);           \
-        *(c_type*)jl_data_ptr(v) = x;                           \
-        return v;                                               \
+    static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
+    JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)                     \
+    {                                                                   \
+        jl_task_t *ct = jl_current_task;                                \
+        c_type idx = x+NBOX_C/2;                                        \
+        if ((u##c_type)idx < (u##c_type)NBOX_C)                         \
+            return boxed_##typ##_cache[idx];                            \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+                                    jl_##typ##_type);                   \
+        *(c_type*)jl_data_ptr(v) = x;                                   \
+        return v;                                                       \
+    }
+#define UIBOX_FUNC(typ,c_type,nw)                                       \
+    static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
+    JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)                     \
+    {                                                                   \
+        jl_task_t *ct = jl_current_task;                                \
+        if (x < NBOX_C)                                                 \
+            return boxed_##typ##_cache[x];                              \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+                                    jl_##typ##_type);                   \
+        *(c_type*)jl_data_ptr(v) = x;                                   \
+        return v;                                                       \
     }
 SIBOX_FUNC(int16,  int16_t, 1)
 SIBOX_FUNC(int32,  int32_t, 1)
@@ -848,11 +1170,11 @@ UIBOX_FUNC(uint64, uint64_t, 2)
 static jl_value_t *boxed_char_cache[128];
 JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     uint32_t u = bswap_32(x);
     if (u < 128)
         return boxed_char_cache[(uint8_t)u];
-    jl_value_t *v = jl_gc_alloc(ptls, sizeof(void*), jl_char_type);
+    jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(void*), jl_char_type);
     *(uint32_t*)jl_data_ptr(v) = x;
     return v;
 }
@@ -915,35 +1237,30 @@ JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x)
 
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (type->instance != NULL) return type->instance;
     va_list args;
-    size_t nf = jl_datatype_nfields(type);
+    size_t i, nf = jl_datatype_nfields(type);
     va_start(args, type);
-    jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type);
-    for (size_t i = 0; i < nf; i++) {
-        set_nth_field(type, (void*)jv, i, va_arg(args, jl_value_t*));
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type);
+    if (nf > 0 && jl_field_offset(type, 0) != 0) {
+        memset(jv, 0, jl_field_offset(type, 0));
+    }
+    for (i = 0; i < nf; i++) {
+        set_nth_field(type, jv, i, va_arg(args, jl_value_t*), 0);
     }
     va_end(args);
     return jv;
 }
 
-static void init_struct_tail(jl_datatype_t *type, jl_value_t *jv, size_t na) JL_NOTSAFEPOINT
-{
-    if (na < jl_datatype_nfields(type)) {
-        char *data = (char*)jl_data_ptr(jv);
-        size_t offs = jl_field_offset(type, na);
-        memset(data + offs, 0, jl_datatype_size(type) - offs);
-    }
-}
-
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (!jl_is_datatype(type) || type->layout == NULL) {
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
-    if (type->ninitialized > na || na > jl_datatype_nfields(type))
+    size_t nf = jl_datatype_nfields(type);
+    if (nf - type->name->n_uninitialized > na || na > nf)
         jl_error("invalid struct allocation");
     for (size_t i = 0; i < na; i++) {
         jl_value_t *ft = jl_field_type_concrete(type, i);
@@ -952,17 +1269,28 @@ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args,
     }
     if (type->instance != NULL)
         return type->instance;
-    jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type);
-    for (size_t i = 0; i < na; i++) {
-        set_nth_field(type, (void*)jv, i, args[i]);
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type);
+    if (jl_datatype_nfields(type) > 0) {
+        if (jl_field_offset(type, 0) != 0) {
+            memset(jl_data_ptr(jv), 0, jl_field_offset(type, 0));
+        }
+        JL_GC_PUSH1(&jv);
+        for (size_t i = 0; i < na; i++) {
+            set_nth_field(type, jv, i, args[i], 0);
+        }
+        if (na < jl_datatype_nfields(type)) {
+            char *data = (char*)jl_data_ptr(jv);
+            size_t offs = jl_field_offset(type, na);
+            memset(data + offs, 0, jl_datatype_size(type) - offs);
+        }
+        JL_GC_POP();
     }
-    init_struct_tail(type, jv, na);
     return jv;
 }
 
 JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (!jl_is_tuple(tup))
         jl_type_error("new", (jl_value_t*)jl_tuple_type, tup);
     if (!jl_is_datatype(type) || type->layout == NULL)
@@ -981,13 +1309,19 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
         }
         return type->instance;
     }
-    jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type);
+    size_t size = jl_datatype_size(type);
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
+    if (nf == 0)
+        return jv;
     jl_value_t *fi = NULL;
-    if (type->layout->npointers > 0) {
+    if (type->zeroinit) {
         // if there are references, zero the space first to prevent the GC
         // from seeing uninitialized references during jl_get_nth_field and jl_isa,
         // which can allocate.
-        memset(jl_data_ptr(jv), 0, jl_datatype_size(type));
+        memset(jl_data_ptr(jv), 0, size);
+    }
+    else if (jl_field_offset(type, 0) != 0) {
+        memset(jl_data_ptr(jv), 0, jl_field_offset(type, 0));
     }
     JL_GC_PUSH2(&jv, &fi);
     for (size_t i = 0; i < nargs; i++) {
@@ -995,7 +1329,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
         fi = jl_get_nth_field(tup, i);
         if (!jl_isa(fi, ft))
             jl_type_error("new", ft, fi);
-        set_nth_field(type, (void*)jv, i, fi);
+        set_nth_field(type, jv, i, fi, 0);
     }
     JL_GC_POP();
     return jv;
@@ -1003,10 +1337,10 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
 
 JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (type->instance != NULL) return type->instance;
     size_t size = jl_datatype_size(type);
-    jl_value_t *jv = jl_gc_alloc(ptls, size, type);
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
     if (size > 0)
         memset(jl_data_ptr(jv), 0, size);
     return jv;
@@ -1014,26 +1348,34 @@ JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 
 // field access ---------------------------------------------------------------
 
+JL_DLLEXPORT void jl_lock_value(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    JL_LOCK_NOGC((jl_mutex_t*)v);
+}
+
+JL_DLLEXPORT void jl_unlock_value(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    JL_UNLOCK_NOGC((jl_mutex_t*)v);
+}
+
 JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
 {
-    jl_svec_t *fn = jl_field_names(t);
-    size_t n = jl_svec_len(fn);
-    if (n == 0) {
-        if (jl_is_namedtuple_type(t)) {
-            jl_value_t *ns = jl_tparam0(t);
-            if (jl_is_tuple(ns)) {
-                n = jl_nfields(ns);
-                for(size_t i=0; i < n; i++) {
-                    if (jl_get_nth_field(ns, i) == (jl_value_t*)fld) {
-                        return (int)i;
-                    }
+    if (jl_is_namedtuple_type(t)) {
+        jl_value_t *ns = jl_tparam0(t);
+        if (jl_is_tuple(ns)) {
+            size_t i, n = jl_nfields(ns);
+            for (i = 0; i < n; i++) {
+                if (jl_get_nth_field(ns, i) == (jl_value_t*)fld) {
+                    return (int)i;
                 }
             }
         }
     }
     else {
-        for(size_t i=0; i < n; i++) {
-            if (jl_svecref(fn,i) == (jl_value_t*)fld) {
+        jl_svec_t *fn = jl_field_names(t);
+        size_t i, n = jl_svec_len(fn);
+        for (i = 0; i < n; i++) {
+            if (jl_svecref(fn, i) == (jl_value_t*)fld) {
                 return (int)i;
             }
         }
@@ -1047,19 +1389,39 @@ JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
 JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i)
 {
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
-    assert(i < jl_datatype_nfields(st));
+    if (i >= jl_datatype_nfields(st))
+        jl_bounds_error_int(v, i + 1);
     size_t offs = jl_field_offset(st, i);
     if (jl_field_isptr(st, i)) {
         return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
     }
     jl_value_t *ty = jl_field_type_concrete(st, i);
+    int isatomic = jl_field_isatomic(st, i);
     if (jl_is_uniontype(ty)) {
-        uint8_t sel = ((uint8_t*)v)[offs + jl_field_size(st, i) - 1];
+        assert(!isatomic);
+        size_t fsz = jl_field_size(st, i);
+        uint8_t sel = ((uint8_t*)v)[offs + fsz - 1];
         ty = jl_nth_union_component(ty, sel);
         if (jl_is_datatype_singleton((jl_datatype_t*)ty))
             return ((jl_datatype_t*)ty)->instance;
     }
-    return jl_new_bits(ty, (char*)v + offs);
+    jl_value_t *r;
+    size_t fsz = jl_datatype_size(ty);
+    int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+    if (isatomic && !needlock) {
+        r = jl_atomic_new_bits(ty, (char*)v + offs);
+    }
+    else if (needlock) {
+        jl_task_t *ct = jl_current_task;
+        r = jl_gc_alloc(ct->ptls, fsz, ty);
+        jl_lock_value(v);
+        memcpy((char*)r, (char*)v + offs, fsz);
+        jl_unlock_value(v);
+    }
+    else {
+        r = jl_new_bits(ty, (char*)v + offs);
+    }
+    return undefref_check((jl_datatype_t*)ty, r);
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
@@ -1073,28 +1435,35 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_RO
 
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
 {
-    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
-    if (i >= jl_datatype_nfields(st))
-        jl_bounds_error_int(v, i + 1);
-    size_t offs = jl_field_offset(st, i);
-    if (jl_field_isptr(st, i)) {
-        jl_value_t *fval = jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
-        if (__unlikely(fval == NULL))
-            jl_throw(jl_undefref_exception);
-        return fval;
+    jl_value_t *r = jl_get_nth_field(v, i);
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
+static inline void memassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
+{
+    if (hasptr) {
+        // assert that although dst might have some undefined bits, the src heap box should be okay with that
+        assert(LLT_ALIGN(nb, sizeof(void*)) == LLT_ALIGN(jl_datatype_size(jl_typeof(src)), sizeof(void*)));
+        size_t nptr = nb / sizeof(void*);
+        memmove_refs((void**)dst, (void**)src, nptr);
+        jl_gc_multi_wb(parent, src);
+        src = (jl_value_t*)((char*)src + nptr * sizeof(void*));
+        nb -= nptr * sizeof(void*);
     }
-    jl_value_t *ty = jl_field_type_concrete(st, i);
-    if (jl_is_uniontype(ty)) {
-        size_t fsz = jl_field_size(st, i);
-        uint8_t sel = ((uint8_t*)v)[offs + fsz - 1];
-        ty = jl_nth_union_component(ty, sel);
-        if (jl_is_datatype_singleton((jl_datatype_t*)ty))
-            return ((jl_datatype_t*)ty)->instance;
+    else {
+        // src must be a heap box.
+        assert(nb == jl_datatype_size(jl_typeof(src)));
+        if (nb >= 16) {
+            memcpy(dst, jl_assume_aligned(src, 16), nb);
+            return;
+        }
     }
-    return undefref_check((jl_datatype_t*)ty, jl_new_bits(ty, (char*)v + offs));
+    memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
 }
 
-void set_nth_field(jl_datatype_t *st, void *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT
+void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT
 {
     size_t offs = jl_field_offset(st, i);
     if (rhs == NULL) { // TODO: this should be invalid, but it happens frequently in ircode.c
@@ -1107,34 +1476,297 @@ void set_nth_field(jl_datatype_t *st, void *v, size_t i, jl_value_t *rhs) JL_NOT
     }
     else {
         jl_value_t *ty = jl_field_type_concrete(st, i);
-        if (jl_is_uniontype(ty)) {
-            uint8_t *psel = &((uint8_t*)v)[offs + jl_field_size(st, i) - 1];
+        jl_value_t *rty = jl_typeof(rhs);
+        int hasptr;
+        int isunion = jl_is_uniontype(ty);
+        if (isunion) {
+            assert(!isatomic);
+            size_t fsz = jl_field_size(st, i);
+            uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
             unsigned nth = 0;
-            if (!jl_find_union_component(ty, jl_typeof(rhs), &nth))
+            if (!jl_find_union_component(ty, rty, &nth))
                 assert(0 && "invalid field assignment to isbits union");
             *psel = nth;
-            if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
+            if (jl_is_datatype_singleton((jl_datatype_t*)rty))
                 return;
+            hasptr = 0;
+        }
+        else {
+            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+        }
+        size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+        int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+        if (isatomic && !needlock) {
+            jl_atomic_store_bits((char*)v + offs, rhs, fsz);
+            if (hasptr)
+                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        }
+        else if (needlock) {
+            jl_lock_value(v);
+            memcpy((char*)v + offs, (char*)rhs, fsz);
+            jl_unlock_value(v);
+        }
+        else {
+            memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
         }
-        jl_assign_bits((char*)v + offs, rhs);
-        jl_gc_multi_wb(v, rhs);
     }
 }
 
-JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT
+jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic)
 {
-    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    if (!jl_isa(rhs, ty))
+       jl_type_error("swapfield!", ty, rhs);
     size_t offs = jl_field_offset(st, i);
-    char *fld = (char*)v + offs;
+    jl_value_t *r;
     if (jl_field_isptr(st, i)) {
-        jl_value_t *fval = jl_atomic_load_relaxed((jl_value_t**)fld);
-        return fval != NULL;
+        if (isatomic)
+            r = jl_atomic_exchange((jl_value_t**)((char*)v + offs), rhs);
+        else
+            r = jl_atomic_exchange_relaxed((jl_value_t**)((char*)v + offs), rhs);
+        jl_gc_wb(v, rhs);
     }
-    jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(st, i);
-    if (jl_is_datatype(ft) && ft->layout->first_ptr >= 0) {
-         return ((jl_value_t**)fld)[ft->layout->first_ptr] != NULL;
+    else {
+        jl_value_t *rty = jl_typeof(rhs);
+        int hasptr;
+        int isunion = jl_is_uniontype(ty);
+        if (isunion) {
+            assert(!isatomic);
+            r = jl_get_nth_field(v, i);
+            size_t fsz = jl_field_size(st, i);
+            uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
+            unsigned nth = 0;
+            if (!jl_find_union_component(ty, rty, &nth))
+                assert(0 && "invalid field assignment to isbits union");
+            *psel = nth;
+            if (jl_is_datatype_singleton((jl_datatype_t*)rty))
+                return r;
+            hasptr = 0;
+        }
+        else {
+            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+        }
+        size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+        int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+        if (isatomic && !needlock) {
+            r = jl_atomic_swap_bits(rty, (char*)v + offs, rhs, fsz);
+            if (hasptr)
+                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        }
+        else {
+            if (needlock) {
+                jl_task_t *ct = jl_current_task;
+                r = jl_gc_alloc(ct->ptls, fsz, ty);
+                jl_lock_value(v);
+                memcpy((char*)r, (char*)v + offs, fsz);
+                memcpy((char*)v + offs, (char*)rhs, fsz);
+                jl_unlock_value(v);
+            }
+            else {
+                if (!isunion)
+                    r = jl_new_bits(ty, (char*)v + offs);
+                memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
+            }
+            if (needlock || !isunion)
+                r = undefref_check((jl_datatype_t*)ty, r);
+        }
     }
-    return 1;
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
+jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic)
+{
+    size_t offs = jl_field_offset(st, i);
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    jl_value_t *r = jl_get_nth_field_checked(v, i);
+    if (isatomic && jl_field_isptr(st, i))
+        jl_fence(); // load was previously only relaxed
+    jl_value_t **args;
+    JL_GC_PUSHARGS(args, 2);
+    args[0] = r;
+    while (1) {
+        args[1] = rhs;
+        jl_value_t *y = jl_apply_generic(op, args, 2);
+        args[1] = y;
+        if (!jl_isa(y, ty))
+            jl_type_error("modifyfield!", ty, y);
+        if (jl_field_isptr(st, i)) {
+            jl_value_t **p = (jl_value_t**)((char*)v + offs);
+            if (isatomic ? jl_atomic_cmpswap(p, &r, y) : jl_atomic_cmpswap_relaxed(p, &r, y))
+                break;
+        }
+        else {
+            jl_value_t *yty = jl_typeof(y);
+            jl_value_t *rty = jl_typeof(r);
+            int hasptr;
+            int isunion = jl_is_uniontype(ty);
+            if (isunion) {
+                assert(!isatomic);
+                hasptr = 0;
+            }
+            else {
+                hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+            }
+            size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+            int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+            if (isatomic && !needlock) {
+                if (jl_atomic_bool_cmpswap_bits((char*)v + offs, r, y, fsz)) {
+                    if (hasptr)
+                        jl_gc_multi_wb(v, y); // y is immutable
+                    break;
+                }
+                r = jl_atomic_new_bits(ty, (char*)v + offs);
+            }
+            else {
+                if (needlock)
+                    jl_lock_value(v);
+                int success = memcmp((char*)v + offs, r, fsz) == 0;
+                if (success) {
+                    if (isunion) {
+                        size_t fsz = jl_field_size(st, i);
+                        uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
+                        success = (jl_typeof(r) == jl_nth_union_component(ty, *psel));
+                        if (success) {
+                            unsigned nth = 0;
+                            if (!jl_find_union_component(ty, yty, &nth))
+                                assert(0 && "invalid field assignment to isbits union");
+                            *psel = nth;
+                            if (jl_is_datatype_singleton((jl_datatype_t*)yty))
+                                break;
+                        }
+                        fsz = jl_datatype_size((jl_datatype_t*)yty); // need to shrink-wrap the final copy
+                    }
+                    else {
+                        assert(yty == ty && rty == ty);
+                    }
+                    memassign_safe(hasptr, v, (char*)v + offs, y, fsz);
+                }
+                if (needlock)
+                    jl_unlock_value(v);
+                if (success)
+                    break;
+                r = jl_get_nth_field(v, i);
+            }
+        }
+        args[0] = r;
+        jl_gc_safepoint();
+    }
+    // args[0] == r (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
+    JL_GC_POP();
+    return args[0];
+}
+
+jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    if (!jl_isa(rhs, ty))
+        jl_type_error("replacefield!", ty, rhs);
+    size_t offs = jl_field_offset(st, i);
+    jl_value_t *r = expected;
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    if (jl_field_isptr(st, i)) {
+        jl_value_t **p = (jl_value_t**)((char*)v + offs);
+        int success;
+        while (1) {
+            success = isatomic ? jl_atomic_cmpswap(p, &r, rhs) : jl_atomic_cmpswap_relaxed(p, &r, rhs);
+            if (success)
+                jl_gc_wb(v, rhs);
+            if (__unlikely(r == NULL))
+                jl_throw(jl_undefref_exception);
+            if (success || !jl_egal(r, expected))
+                break;
+        }
+        JL_GC_PUSH1(&r);
+        r = jl_new_struct(rettyp, r, success ? jl_true : jl_false);
+        JL_GC_POP();
+    }
+    else {
+        int hasptr;
+        int isunion = jl_is_uniontype(ty);
+        int needlock;
+        jl_value_t *rty = ty;
+        size_t fsz = jl_field_size(st, i);
+        if (isunion) {
+            assert(!isatomic);
+            hasptr = 0;
+            needlock = 0;
+            isatomic = 0; // this makes GCC happy
+        }
+        else {
+            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+            fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+            needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+        }
+        if (isatomic && !needlock) {
+            r = jl_atomic_cmpswap_bits((jl_datatype_t*)ty, rettyp, (char*)v + offs, r, rhs, fsz);
+            int success = *((uint8_t*)r + fsz);
+            if (success && hasptr)
+                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        }
+        else {
+            jl_task_t *ct = jl_current_task;
+            uint8_t *psel = NULL;
+            if (isunion) {
+                psel = &((uint8_t*)v)[offs + fsz - 1];
+                rty = jl_nth_union_component(rty, *psel);
+            }
+            assert(!jl_field_isptr(rettyp, 0));
+            r = jl_gc_alloc(ct->ptls, rettyp->size, (jl_value_t*)rettyp);
+            int success = (rty == jl_typeof(expected));
+            if (needlock)
+                jl_lock_value(v);
+            memcpy((char*)r, (char*)v + offs, fsz); // copy field, including union bits
+            if (success) {
+                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+                if (((jl_datatype_t*)rty)->layout->haspadding)
+                    success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
+                else
+                    success = memcmp((char*)r, (char*)expected, fsz) == 0;
+            }
+            *((uint8_t*)r + fsz) = success ? 1 : 0;
+            if (success) {
+                jl_value_t *rty = jl_typeof(rhs);
+                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+                if (isunion) {
+                    unsigned nth = 0;
+                    if (!jl_find_union_component(ty, rty, &nth))
+                        assert(0 && "invalid field assignment to isbits union");
+                    *psel = nth;
+                    if (jl_is_datatype_singleton((jl_datatype_t*)rty))
+                        return r;
+                }
+                memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
+            }
+            if (needlock)
+                jl_unlock_value(v);
+        }
+        r = undefref_check((jl_datatype_t*)rty, r);
+        if (__unlikely(r == NULL))
+            jl_throw(jl_undefref_exception);
+    }
+    return r;
+}
+
+JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT
+{
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t offs = jl_field_offset(st, i);
+    jl_value_t **fld = (jl_value_t**)((char*)v + offs);
+    if (!jl_field_isptr(st, i)) {
+        jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(st, i);
+        if (!jl_is_datatype(ft) || ft->layout->first_ptr < 0)
+            return 2; // isbits are always defined
+        fld += ft->layout->first_ptr;
+    }
+    jl_value_t *fval = jl_atomic_load_relaxed(fld);
+    return fval != NULL ? 1 : 0;
 }
 
 JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field) JL_NOTSAFEPOINT
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index b0243b2f3f4ff6..ad9ed659cbe0dc 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -214,7 +214,7 @@ class JuliaJITEventListener: public JITEventListener
                                       const RuntimeDyld::LoadedObjectInfo &L,
                                       RTDyldMemoryManager *memmgr)
     {
-        jl_ptls_t ptls = jl_get_ptls_states();
+        jl_ptls_t ptls = jl_current_task->ptls;
         // This function modify codeinst->fptr in GC safe region.
         // This should be fine since the GC won't scan this field.
         int8_t gc_state = jl_gc_safe_enter(ptls);
@@ -236,15 +236,9 @@ class JuliaJITEventListener: public JITEventListener
         object::section_iterator EndSection = debugObj.section_end();
         std::map<StringRef, object::SectionRef, strrefcomp> loadedSections;
         for (const object::SectionRef &lSection: Object.sections()) {
-#if JL_LLVM_VERSION >= 100000
             auto sName = lSection.getName();
             if (sName)
                 loadedSections[*sName] = lSection;
-#else
-            StringRef sName;
-            if (!lSection.getName(sName))
-                loadedSections[sName] = lSection;
-#endif
         }
         auto getLoadAddress = [&] (const StringRef &sName) -> uint64_t {
             auto search = loadedSections.find(sName);
@@ -265,21 +259,12 @@ class JuliaJITEventListener: public JITEventListener
                 istext = true;
             }
             else {
-#if JL_LLVM_VERSION >= 100000
                 auto sName = section.getName();
                 if (!sName)
                     continue;
                 if (sName.get() != ".ARM.exidx") {
                     continue;
                 }
-#else
-                StringRef sName;
-                if (section.getName(sName))
-                    continue;
-                if (sName != ".ARM.exidx") {
-                    continue;
-                }
-#endif
             }
             uint64_t loadaddr = L.getSectionLoadAddress(section);
             size_t seclen = section.getSize();
@@ -333,11 +318,7 @@ class JuliaJITEventListener: public JITEventListener
                 auto Section = cantFail(sym_iter.getSection());
                 assert(Section != EndSection && Section->isText());
                 uint64_t SectionAddr = Section->getAddress();
-#if JL_LLVM_VERSION >= 100000
                 sName = cantFail(Section->getName());
-#else
-                Section->getName(sName);
-#endif
                 uint64_t SectionLoadAddr = getLoadAddress(sName);
                 assert(SectionLoadAddr);
                 if (SectionAddrCheck) // assert that all of the Sections are at the same location
@@ -387,12 +368,7 @@ class JuliaJITEventListener: public JITEventListener
             if (Section == EndSection) continue;
             if (!Section->isText()) continue;
             uint64_t SectionAddr = Section->getAddress();
-#if JL_LLVM_VERSION >= 100000
             StringRef secName = cantFail(Section->getName());
-#else
-            StringRef secName;
-            Section->getName(secName);
-#endif
             uint64_t SectionLoadAddr = getLoadAddress(secName);
             Addr -= SectionAddr - SectionLoadAddr;
             StringRef sName = cantFail(sym_iter.getName());
@@ -622,13 +598,8 @@ static debug_link_info getDebuglink(const object::ObjectFile &Obj) JL_NOTSAFEPOI
 {
     debug_link_info info = {};
     for (const object::SectionRef &Section: Obj.sections()) {
-#if JL_LLVM_VERSION >= 100000
         Expected<StringRef> sName = Section.getName();
         if (sName && *sName == ".gnu_debuglink")
-#else
-        StringRef sName;
-        if (!Section.getName(sName) && sName == ".gnu_debuglink")
-#endif
         {
             auto found = Section.getContents();
             if (found) {
diff --git a/src/disasm.cpp b/src/disasm.cpp
index 000f6916ec2bb0..6157873ad71c5f 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -5,11 +5,45 @@
 //
 // Original copyright:
 //
-//                     The LLVM Compiler Infrastructure
+// University of Illinois/NCSA
+// Open Source License
+// Copyright (c) 2003-2016 University of Illinois at Urbana-Champaign.
+// All rights reserved.
 //
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+//  Developed by:
 //
+//    LLVM Team
+//
+//    University of Illinois at Urbana-Champaign
+//
+//    http://llvm.org
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files (the "Software"), to deal with
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions:
+//
+//    * Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimers.
+//
+//    * Redistributions in binary form must reproduce the above copyright notice,
+//      this list of conditions and the following disclaimers in the
+//      documentation and/or other materials provided with the distribution.
+//
+//    * Neither the names of the LLVM Team, University of Illinois at
+//      Urbana-Champaign, nor the names of its contributors may be used to
+//      endorse or promote products derived from this Software without specific
+//      prior written permission.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+// SOFTWARE.
 //===----------------------------------------------------------------------===//
 //
 // This class implements a disassembler of a memory block, given a function
@@ -22,43 +56,53 @@
 #include <string>
 
 #include "llvm-version.h"
-#include <llvm/Object/ObjectFile.h>
-#include <llvm/BinaryFormat/MachO.h>
+
+// for outputting disassembly
+#include <llvm/ADT/Triple.h>
+#include <llvm/AsmParser/Parser.h>
 #include <llvm/BinaryFormat/COFF.h>
-#include <llvm/MC/MCInst.h>
-#include <llvm/MC/MCStreamer.h>
-#include <llvm/MC/MCSubtargetInfo.h>
-#include <llvm/MC/MCObjectFileInfo.h>
-#include <llvm/MC/MCRegisterInfo.h>
-#include <llvm/MC/MCAsmInfo.h>
+#include <llvm/BinaryFormat/MachO.h>
+#include <llvm/DebugInfo/DIContext.h>
+#include <llvm/DebugInfo/DWARF/DWARFContext.h>
+#include <llvm/ExecutionEngine/JITEventListener.h>
+#include <llvm/IR/AssemblyAnnotationWriter.h>
+#include <llvm/IR/DebugInfo.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
 #include <llvm/MC/MCAsmBackend.h>
+#include <llvm/MC/MCAsmInfo.h>
 #include <llvm/MC/MCCodeEmitter.h>
-#include <llvm/MC/MCInstPrinter.h>
-#include <llvm/MC/MCInstrInfo.h>
 #include <llvm/MC/MCContext.h>
+#include <llvm/MC/MCDisassembler/MCDisassembler.h>
+#include <llvm/MC/MCDisassembler/MCExternalSymbolizer.h>
 #include <llvm/MC/MCExpr.h>
+#include <llvm/MC/MCInst.h>
+#include <llvm/MC/MCInstPrinter.h>
 #include <llvm/MC/MCInstrAnalysis.h>
+#include <llvm/MC/MCInstrInfo.h>
+#include <llvm/MC/MCObjectFileInfo.h>
+#include <llvm/MC/MCRegisterInfo.h>
+#include <llvm/MC/MCStreamer.h>
+#include <llvm/MC/MCSubtargetInfo.h>
 #include <llvm/MC/MCSymbol.h>
-#include <llvm/AsmParser/Parser.h>
-#include <llvm/MC/MCDisassembler/MCDisassembler.h>
-#include <llvm/MC/MCDisassembler/MCExternalSymbolizer.h>
-#include <llvm/ADT/Triple.h>
+#include <llvm/Object/ObjectFile.h>
+#include <llvm/Support/FormattedStream.h>
 #include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/NativeFormatting.h>
 #include <llvm/Support/SourceMgr.h>
 #include <llvm/Support/TargetRegistry.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
-#include <llvm/Support/FormattedStream.h>
-#include <llvm/Support/NativeFormatting.h>
-#include <llvm/ExecutionEngine/JITEventListener.h>
-#include <llvm/IR/LLVMContext.h>
-#include <llvm/DebugInfo/DIContext.h>
-#include <llvm/DebugInfo/DWARF/DWARFContext.h>
-#include <llvm/IR/DebugInfo.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/AssemblyAnnotationWriter.h>
+
+// for outputting assembly
+#include <llvm/CodeGen/AsmPrinter.h>
+#include <llvm/CodeGen/AsmPrinterHandler.h>
+#include <llvm/CodeGen/MachineModuleInfo.h>
+#include <llvm/CodeGen/Passes.h>
+#include <llvm/CodeGen/TargetPassConfig.h>
+#include <llvm/Support/CodeGen.h>
 #include <llvm/IR/LegacyPassManager.h>
 
 #include "julia.h"
@@ -249,7 +293,7 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         if (frame.Line != UINT_MAX && frame.Line != 0)
             Out << ":" << frame.Line;
         StringRef method = StringRef(frame.FunctionName).rtrim(';');
-        Out << " within `" << method << "'";
+        Out << " within `" << method << "`";
         if (collapse_recursive) {
             while (nctx < nframes) {
                 const DILineInfo &frame = DI.at(nframes - 1 - nctx);
@@ -279,20 +323,26 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
 
 // adaptor class for printing line numbers before llvm IR lines
 class LineNumberAnnotatedWriter : public AssemblyAnnotationWriter {
-    DILocation *InstrLoc = nullptr;
-    DILineInfoPrinter LinePrinter{"; ", false};
+    const DILocation *InstrLoc = nullptr;
+    DILineInfoPrinter LinePrinter;
     DenseMap<const Instruction *, DILocation *> DebugLoc;
     DenseMap<const Function *, DISubprogram *> Subprogram;
 public:
-    LineNumberAnnotatedWriter(const char *debuginfo)
-    {
+    LineNumberAnnotatedWriter(const char *LineStart, bool bracket_outer, const char *debuginfo)
+      : LinePrinter(LineStart, bracket_outer) {
         LinePrinter.SetVerbosity(debuginfo);
     }
     virtual void emitFunctionAnnot(const Function *, formatted_raw_ostream &);
     virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &);
+    virtual void emitInstructionAnnot(const DILocation *, formatted_raw_ostream &);
     virtual void emitBasicBlockEndAnnot(const BasicBlock *, formatted_raw_ostream &);
     // virtual void printInfoComment(const Value &, formatted_raw_ostream &) {}
 
+    void emitEnd(formatted_raw_ostream &Out) {
+        LinePrinter.emit_finish(Out);
+        InstrLoc = nullptr;
+    }
+
     void addSubprogram(const Function *F, DISubprogram *SP)
     {
         Subprogram[F] = SP;
@@ -327,12 +377,19 @@ void LineNumberAnnotatedWriter::emitFunctionAnnot(
 void LineNumberAnnotatedWriter::emitInstructionAnnot(
       const Instruction *I, formatted_raw_ostream &Out)
 {
-    DILocation *NewInstrLoc = I->getDebugLoc();
+    const DILocation *NewInstrLoc = I->getDebugLoc();
     if (!NewInstrLoc) {
         auto Loc = DebugLoc.find(I);
         if (Loc != DebugLoc.end())
             NewInstrLoc = Loc->second;
     }
+    emitInstructionAnnot(NewInstrLoc, Out);
+    Out << LinePrinter.inlining_indent(" ");
+}
+
+void LineNumberAnnotatedWriter::emitInstructionAnnot(
+      const DILocation *NewInstrLoc, formatted_raw_ostream &Out)
+{
     if (NewInstrLoc && NewInstrLoc != InstrLoc) {
         InstrLoc = NewInstrLoc;
         std::vector<DILineInfo> DIvec;
@@ -348,14 +405,13 @@ void LineNumberAnnotatedWriter::emitInstructionAnnot(
         } while (NewInstrLoc);
         LinePrinter.emit_lineinfo(Out, DIvec);
     }
-    Out << LinePrinter.inlining_indent(" ");
 }
 
 void LineNumberAnnotatedWriter::emitBasicBlockEndAnnot(
         const BasicBlock *BB, formatted_raw_ostream &Out)
 {
     if (BB == &BB->getParent()->back())
-        LinePrinter.emit_finish(Out);
+        emitEnd(Out);
 }
 
 static void jl_strip_llvm_debug(Module *m, bool all_meta, LineNumberAnnotatedWriter *AAW)
@@ -435,7 +491,7 @@ jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_modul
             jl_error("jl_dump_function_ir: Expected Function* in a temporary Module");
 
         JL_LOCK(&codegen_lock); // Might GC
-        LineNumberAnnotatedWriter AAW{debuginfo};
+        LineNumberAnnotatedWriter AAW{"; ", false, debuginfo};
         if (!llvmf->getParent()) {
             // print the function declaration as-is
             llvmf->print(stream, &AAW);
@@ -470,7 +526,8 @@ static void jl_dump_asm_internal(
         DIContext *di_ctx,
         raw_ostream &rstream,
         const char* asm_variant,
-        const char* debuginfo);
+        const char* debuginfo,
+        bool binary);
 
 // This isn't particularly fast, but neither is printing assembly, and they're only used for interactive mode
 static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
@@ -506,10 +563,9 @@ static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
 
 // print a native disassembly for the function starting at fptr
 extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo)
+jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
 {
     assert(fptr != 0);
-    jl_ptls_t ptls = jl_get_ptls_states();
     std::string code;
     raw_string_ostream stream(code);
 
@@ -537,13 +593,15 @@ jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant,
     }
 
     // Dump assembly code
+    jl_ptls_t ptls = jl_current_task->ptls;
     int8_t gc_state = jl_gc_safe_enter(ptls);
     jl_dump_asm_internal(
             fptr, symsize, slide,
             Section, context,
             stream,
             asm_variant,
-            debuginfo);
+            debuginfo,
+            binary);
     jl_gc_safe_leave(ptls, gc_state);
 
     return jl_pchar_to_string(stream.str().data(), stream.str().size());
@@ -739,6 +797,33 @@ static int OpInfoLookup(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t Si
 }
 } // namespace
 
+// Stringify raw bytes as a comment string.
+std::string rawCodeComment(const llvm::ArrayRef<uint8_t>& Memory, const llvm::Triple& Triple)
+{
+    std::string Buffer{"; "};
+    llvm::raw_string_ostream Stream{Buffer};
+    auto Address = reinterpret_cast<uintptr_t>(Memory.data());
+    // write abbreviated address
+    llvm::write_hex(Stream, Address & 0xffff, HexPrintStyle::Lower, 4);
+    Stream << ":";
+    auto Arch = Triple.getArch();
+    bool FixedLength = !(Arch == Triple::x86 || Arch == Triple::x86_64);
+    if (FixedLength)
+        Stream << " ";
+    if (FixedLength && Triple.isLittleEndian()) {
+        for (auto Iter = Memory.rbegin(); Iter != Memory.rend(); ++Iter)
+            llvm::write_hex(Stream, *Iter, HexPrintStyle::Lower, 2);
+    }
+    else {
+        // variable-length or (fixed-length) big-endian format
+        for (auto Byte : Memory) {
+            if (!FixedLength)
+                Stream << " ";
+            llvm::write_hex(Stream, Byte, HexPrintStyle::Lower, 2);
+        }
+    }
+    return Stream.str();
+}
 
 static void jl_dump_asm_internal(
         uintptr_t Fptr, size_t Fsize, int64_t slide,
@@ -746,7 +831,8 @@ static void jl_dump_asm_internal(
         DIContext *di_ctx,
         raw_ostream &rstream,
         const char* asm_variant,
-        const char* debuginfo)
+        const char* debuginfo,
+        bool binary)
 {
     // GC safe
     // Get the host information
@@ -763,11 +849,8 @@ static void jl_dump_asm_internal(
     SourceMgr SrcMgr;
 
     MCTargetOptions Options;
-    std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*TheTarget->createMCRegInfo(TheTriple.str()), TheTriple.str()
-#if JL_LLVM_VERSION >= 100000
-            , Options
-#endif
-        ));
+    std::unique_ptr<MCAsmInfo> MAI(
+        TheTarget->createMCAsmInfo(*TheTarget->createMCRegInfo(TheTriple.str()), TheTriple.str(), Options));
     assert(MAI && "Unable to create target asm info!");
 
     std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TheTriple.str()));
@@ -845,6 +928,16 @@ static void jl_dump_asm_internal(
         }
     }
 
+    if (binary) {
+        // Print the complete address and the size at the top (instruction addresses are abbreviated)
+        std::string Buffer{"; code origin: "};
+        llvm::raw_string_ostream Stream{Buffer};
+        auto Address = reinterpret_cast<uintptr_t>(memoryObject.data());
+        llvm::write_hex(Stream, Address, HexPrintStyle::Lower, 16);
+        Stream << ", code size: " << memoryObject.size();
+        Streamer->emitRawText(Stream.str());
+    }
+
     // Take two passes: In the first pass we record all branch labels,
     // in the second we actually perform the output
     for (int pass = 0; pass < 2; ++ pass) {
@@ -878,11 +971,7 @@ static void jl_dump_asm_internal(
                     std::string buf;
                     dbgctx.emit_lineinfo(buf, di_lineIter->second);
                     if (!buf.empty()) {
-#if JL_LLVM_VERSION >= 110000
                         Streamer->emitRawText(buf);
-#else
-                        Streamer->EmitRawText(buf);
-#endif
                     }
                 }
             }
@@ -898,11 +987,7 @@ static void jl_dump_asm_internal(
                 if (di_ctx) {
                     std::string buf;
                     DILineInfoSpecifier infoSpec(
-#if JL_LLVM_VERSION >= 110000
                         DILineInfoSpecifier::FileLineInfoKind::RawValue,
-#else
-                        DILineInfoSpecifier::FileLineInfoKind::Default,
-#endif
                         DILineInfoSpecifier::FunctionNameKind::ShortName);
                     DIInliningInfo dbg = di_ctx->getInliningInfoForAddress(makeAddress(Section, Index + Fptr + slide), infoSpec);
                     if (dbg.getNumberOfFrames()) {
@@ -912,11 +997,7 @@ static void jl_dump_asm_internal(
                         dbgctx.emit_lineinfo(buf, di_lineIter->second);
                     }
                     if (!buf.empty()) {
-#if JL_LLVM_VERSION >= 110000
                         Streamer->emitRawText(buf);
-#else
-                        Streamer->EmitRawText(buf);
-#endif
                     }
                     nextLineAddr = (++di_lineIter)->first;
                 }
@@ -928,11 +1009,7 @@ static void jl_dump_asm_internal(
                 // stream << Index << ": ";
                 MCSymbol *symbol = DisInfo.lookupSymbol(Fptr+Index);
                 if (symbol) {
-#if JL_LLVM_VERSION >= 110000
                     Streamer->emitLabel(symbol);
-#else
-                    Streamer->EmitLabel(symbol);
-#endif
                 }
             }
 
@@ -940,9 +1017,6 @@ static void jl_dump_asm_internal(
             MCDisassembler::DecodeStatus S;
             FuncMCView view = memoryObject.slice(Index);
             S = DisAsm->getInstruction(Inst, insSize, view, 0,
-#if JL_LLVM_VERSION < 100000
-                                      /*VStream*/ nulls(),
-#endif
                                       /*CStream*/ pass != 0 ? Streamer->GetCommentOS() : nulls());
             if (pass != 0 && Streamer->GetCommentOS().tell() > 0)
                 Streamer->GetCommentOS() << '\n';
@@ -967,21 +1041,13 @@ static void jl_dump_asm_internal(
                             llvm::write_hex(buf, *(uint8_t*)(Fptr + Index + i), HexPrintStyle::PrefixLower, 2);
                         }
                     }
-#if JL_LLVM_VERSION >= 110000
                     Streamer->emitRawText(StringRef(buf.str()));
-#else
-                    Streamer->EmitRawText(StringRef(buf.str()));
-#endif
                 }
                 break;
 
             case MCDisassembler::SoftFail:
                 if (pass != 0) {
-#if JL_LLVM_VERSION >= 110000
                     Streamer->emitRawText(StringRef("potentially undefined instruction encoding:"));
-#else
-                    Streamer->EmitRawText(StringRef("potentially undefined instruction encoding:"));
-#endif
                 }
                 // Fall through
 
@@ -1014,11 +1080,9 @@ static void jl_dump_asm_internal(
                             }
                         }
                     }
-#if JL_LLVM_VERSION >= 110000
+                    if (binary)
+                        Streamer->emitRawText(rawCodeComment(memoryObject.slice(Index, insSize), TheTriple));
                     Streamer->emitInstruction(Inst, *STI);
-#else
-                    Streamer->EmitInstruction(Inst, *STI);
-#endif
                 }
                 break;
             }
@@ -1032,16 +1096,144 @@ static void jl_dump_asm_internal(
             std::string buf;
             dbgctx.emit_finish(buf);
             if (!buf.empty()) {
-#if JL_LLVM_VERSION >= 110000
                 Streamer->emitRawText(buf);
-#else
-                Streamer->EmitRawText(buf);
-#endif
             }
         }
     }
 }
 
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *
+addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM) {
+    TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+    PassConfig->setDisableVerify(false);
+    PM.add(PassConfig);
+    MachineModuleInfoWrapperPass *MMIWP =
+        new MachineModuleInfoWrapperPass(TM);
+    PM.add(MMIWP);
+    if (PassConfig->addISelPasses())
+        return NULL;
+    PassConfig->addMachinePasses();
+    PassConfig->setInitialized();
+    return &MMIWP->getMMI().getContext();
+}
+
+class LineNumberPrinterHandler : public AsmPrinterHandler {
+    MCStreamer &S;
+    LineNumberAnnotatedWriter LinePrinter;
+    std::string Buffer;
+    llvm::raw_string_ostream RawStream;
+    llvm::formatted_raw_ostream Stream;
+
+public:
+    LineNumberPrinterHandler(AsmPrinter &Printer, const char *debuginfo)
+        : S(*Printer.OutStreamer),
+          LinePrinter("; ", true, debuginfo),
+          RawStream(Buffer),
+          Stream(RawStream) {}
+
+    void emitAndReset() {
+        Stream.flush();
+        RawStream.flush();
+        if (Buffer.empty())
+            return;
+        S.emitRawText(Buffer);
+        Buffer.clear();
+    }
+
+    virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+    //virtual void beginModule(Module *M) override {}
+    virtual void endModule() override {}
+    /// note that some AsmPrinter implementations may not call beginFunction at all
+    virtual void beginFunction(const MachineFunction *MF) override {
+        LinePrinter.emitFunctionAnnot(&MF->getFunction(), Stream);
+        emitAndReset();
+    }
+    //virtual void markFunctionEnd() override {}
+    virtual void endFunction(const MachineFunction *MF) override {
+        LinePrinter.emitEnd(Stream);
+        emitAndReset();
+    }
+    //virtual void beginFragment(const MachineBasicBlock *MBB,
+    //                           ExceptionSymbolProvider ESP) override {}
+    //virtual void endFragment() override {}
+    //virtual void beginFunclet(const MachineBasicBlock &MBB,
+    //                          MCSymbol *Sym = nullptr) override {}
+    //virtual void endFunclet() override {}
+    virtual void beginInstruction(const MachineInstr *MI) override {
+        LinePrinter.emitInstructionAnnot(MI->getDebugLoc(), Stream);
+        emitAndReset();
+    }
+    virtual void endInstruction() override {}
+};
+
+// get a native assembly for llvm::Function
+extern "C" JL_DLLEXPORT
+jl_value_t *jl_dump_function_asm(void *F, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
+{
+    // precise printing via IR assembler
+    SmallVector<char, 4096> ObjBufferSV;
+    { // scope block
+        Function *f = (Function*)F;
+        llvm::raw_svector_ostream asmfile(ObjBufferSV);
+        assert(!f->isDeclaration());
+        std::unique_ptr<Module> m(f->getParent());
+        for (auto &f2 : m->functions()) {
+            if (f != &f2 && !f->isDeclaration())
+                f2.deleteBody();
+        }
+        LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(jl_TargetMachine);
+        legacy::PassManager PM;
+        addTargetPasses(&PM, TM);
+        if (raw_mc) {
+            raw_svector_ostream obj_OS(ObjBufferSV);
+            if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
+                return jl_an_empty_string;
+            PM.run(*m);
+        }
+        else {
+            MCContext *Context = addPassesToGenerateCode(TM, PM);
+            if (!Context)
+                return jl_an_empty_string;
+            Context->setGenDwarfForAssembly(false);
+            // Duplicate LLVMTargetMachine::addAsmPrinter here so we can set the asm dialect and add the custom annotation printer
+            const MCSubtargetInfo &STI = *TM->getMCSubtargetInfo();
+            const MCAsmInfo &MAI = *TM->getMCAsmInfo();
+            const MCRegisterInfo &MRI = *TM->getMCRegisterInfo();
+            const MCInstrInfo &MII = *TM->getMCInstrInfo();
+            unsigned OutputAsmDialect = MAI.getAssemblerDialect();
+            if (!strcmp(asm_variant, "att"))
+                OutputAsmDialect = 0;
+            if (!strcmp(asm_variant, "intel"))
+                OutputAsmDialect = 1;
+            MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter(
+                TM->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI);
+             std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
+                STI, MRI, TM->Options.MCOptions));
+            std::unique_ptr<MCCodeEmitter> MCE;
+            if (binary) // enable MCAsmStreamer::AddEncodingComment printing
+                MCE.reset(TM->getTarget().createMCCodeEmitter(MII, MRI, *Context));
+            auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
+            std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
+                *Context, std::move(FOut), true,
+                true, InstPrinter,
+                std::move(MCE), std::move(MAB),
+                false));
+            std::unique_ptr<AsmPrinter> Printer(
+                TM->getTarget().createAsmPrinter(*TM, std::move(S)));
+            Printer->addAsmPrinterHandler(AsmPrinter::HandlerInfo(
+                        std::unique_ptr<AsmPrinterHandler>(new LineNumberPrinterHandler(*Printer, debuginfo)),
+                        "emit", "Debug Info Emission", "Julia", "Julia::LineNumberPrinterHandler Markup"));
+            if (!Printer)
+                return jl_an_empty_string;
+            PM.add(Printer.release());
+            PM.add(createFreeMachineFunctionPass());
+            PM.run(*m);
+        }
+    }
+    return jl_pchar_to_string(ObjBufferSV.data(), ObjBufferSV.size());
+}
+
 extern "C" JL_DLLEXPORT
 LLVMDisasmContextRef jl_LLVMCreateDisasm(
         const char *TripleName, void *DisInfo, int TagType,
diff --git a/src/dlload.c b/src/dlload.c
index ecb34d2be57e33..0f7914050e8b4e 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -120,7 +120,6 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
         needsSymRefreshModuleList = 1;
     return lib;
 #else
-    dlerror(); /* Reset error status. */
     return dlopen(filename,
                   (flags & JL_RTLD_NOW ? RTLD_NOW : RTLD_LAZY)
                   | JL_RTLD(flags, LOCAL)
@@ -131,7 +130,7 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
 #ifdef RTLD_NOLOAD
                   | JL_RTLD(flags, NOLOAD)
 #endif
-#if defined(RTLD_DEEPBIND) && !(defined(JL_ASAN_ENABLED) || defined(JL_TSAN_ENABLED) || defined(JL_MSAN_ENABLED))
+#if defined(RTLD_DEEPBIND) && !(defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_))
                   | JL_RTLD(flags, DEEPBIND)
 #endif
 #ifdef RTLD_FIRST
@@ -144,16 +143,20 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
 JL_DLLEXPORT int jl_dlclose(void *handle) JL_NOTSAFEPOINT
 {
 #ifdef _OS_WINDOWS_
-    if (!handle) return -1;
+    if (!handle) {
+        return -1;
+    }
     return !FreeLibrary((HMODULE) handle);
 #else
-    dlerror(); /* Reset error status. */
-    if (!handle) return -1;
+    if (!handle) {
+        dlerror(); /* Reset error status. */
+        return -1;
+    }
     return dlclose(handle);
 #endif
 }
 
-JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, int throw_err) JL_NOTSAFEPOINT // (or throw)
+JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, int throw_err)
 {
     char path[PATHBUF], relocated[PATHBUF];
     int i;
@@ -175,20 +178,12 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
         if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
                                 (LPCWSTR)(uintptr_t)(&jl_load_dynamic_library),
                                 (HMODULE*)&handle)) {
-#ifndef __clang_analyzer__
-            // Hide the error throwing from the analyser since there isn't a way to express
-            // "safepoint only when throwing error" currently.
             jl_error("could not load base module");
-#endif
         }
 #else
         Dl_info info;
         if (!dladdr((void*)(uintptr_t)&jl_load_dynamic_library, &info) || !info.dli_fname) {
-#ifndef __clang_analyzer__
-            // Hide the error throwing from the analyser since there isn't a way to express
-            // "safepoint only when throwing error" currently.
             jl_error("could not load base module");
-#endif
         }
         handle = dlopen(info.dli_fname, RTLD_NOW);
 #endif
@@ -271,11 +266,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
 #else
         const char *reason = dlerror();
 #endif
-#ifndef __clang_analyzer__
-        // Hide the error throwing from the analyser since there isn't a way to express
-        // "safepoint only when throwing error" currently.
         jl_errorf("could not load library \"%s\"\n%s", modname, reason);
-#endif
     }
     handle = NULL;
 
@@ -291,19 +282,26 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
 #ifdef _OS_WINDOWS_
     *value = GetProcAddress((HMODULE) handle, symbol);
 #else
-    dlerror(); /* Reset error status. */
     *value = dlsym(handle, symbol);
 #endif
 
-    /* Next, check for errors.  On Windows, a NULL pointer means the symbol
-     * was not found.  On everything else, we can have NULL symbols, so we check
-     * for non-NULL returns from dlerror().  Note that means we unconditionally
-     * call dlerror() on POSIX systems.*/
-#ifdef _OS_WINDOWS_
+    /* Next, check for errors. On Windows, a NULL pointer means the symbol was
+     * not found. On everything else, we can have NULL symbols, so we check for
+     * non-NULL returns from dlerror(). Since POSIX doesn't require `dlerror`
+     * to be implemented safely, FreeBSD doesn't (unlike everyone else, who
+     * realized decades ago that threads are here to stay), so we avoid calling
+     * `dlerror` unless we need to get the error message.
+     * https://github.com/freebsd/freebsd-src/blob/12db51d20823a5e3b9e5f8a2ea73156fe1cbfc28/libexec/rtld-elf/rtld.c#L198
+     */
     symbol_found = *value != NULL;
-#else
-    const char *err = dlerror();
-    symbol_found = err == NULL;
+#ifndef _OS_WINDOWS_
+    const char *err;
+    if (!symbol_found) {
+        dlerror(); /* Reset error status. */
+        *value = dlsym(handle, symbol);
+        err = dlerror();
+        symbol_found = *value != NULL || err == NULL;
+    }
 #endif
 
     if (!symbol_found && throw_err) {
diff --git a/src/dump.c b/src/dump.c
index 484c31a93bac61..afadca3edad2ad 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -8,6 +8,7 @@
 
 #include "julia.h"
 #include "julia_internal.h"
+#include "julia_gcext.h"
 #include "builtin_proto.h"
 #include "serialize.h"
 
@@ -45,6 +46,7 @@ static jl_value_t *deser_symbols[256];
 static htable_t backref_table;
 static int backref_table_numel;
 static arraylist_t backref_list;
+static htable_t new_code_instance_validate;
 
 // list of (jl_value_t **loc, size_t pos) entries
 // for anything that was flagged by the deserializer for later
@@ -270,18 +272,14 @@ static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_
     write_int32(s->s, dt->size);
     int has_instance = (dt->instance != NULL);
     int has_layout = (dt->layout != NULL);
-    write_uint8(s->s, dt->abstract | (dt->mutabl << 1) | (has_layout << 2) | (has_instance << 3));
+    write_uint8(s->s, has_layout | (has_instance << 1));
     write_uint8(s->s, dt->hasfreetypevars
             | (dt->isconcretetype << 1)
             | (dt->isdispatchtuple << 2)
             | (dt->isbitstype << 3)
             | (dt->zeroinit << 4)
-            | (dt->isinlinealloc << 5)
-            | (dt->has_concrete_subtype << 6)
-            | (dt->cached_by_hash << 7));
-    if (!dt->abstract) {
-        write_uint16(s->s, dt->ninitialized);
-    }
+            | (dt->has_concrete_subtype << 5)
+            | (dt->cached_by_hash << 6));
     write_int32(s->s, dt->hash);
 
     if (has_layout) {
@@ -311,7 +309,6 @@ static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_
     if (has_instance)
         jl_serialize_value(s, dt->instance);
     jl_serialize_value(s, dt->name);
-    jl_serialize_value(s, dt->names);
     jl_serialize_value(s, dt->parameters);
     jl_serialize_value(s, dt->super);
     jl_serialize_value(s, dt->types);
@@ -504,6 +501,11 @@ static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_
     jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque);
 }
 
+enum METHOD_SERIALIZATION_MODE {
+    METHOD_INTERNAL = 1,
+    METHOD_EXTERNAL_MT = 2,
+};
+
 static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
 {
     if (jl_serialize_generic(s, v)) {
@@ -627,9 +629,10 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
     else if (jl_is_method(v)) {
         write_uint8(s->s, TAG_METHOD);
         jl_method_t *m = (jl_method_t*)v;
-        int internal = 1;
-        internal = m->is_for_opaque_closure || module_in_worklist(m->module);
-        if (!internal) {
+        int serialization_mode = 0;
+        if (m->is_for_opaque_closure || module_in_worklist(m->module))
+            serialization_mode |= METHOD_INTERNAL;
+        if (!(serialization_mode & METHOD_INTERNAL)) {
             // flag this in the backref table as special
             uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v);
             assert(*bp != (uintptr_t)HT_NOTFOUND);
@@ -637,8 +640,24 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         }
         jl_serialize_value(s, (jl_value_t*)m->sig);
         jl_serialize_value(s, (jl_value_t*)m->module);
-        write_uint8(s->s, internal);
-        if (!internal)
+        if (m->external_mt != NULL) {
+            assert(jl_typeis(m->external_mt, jl_methtable_type));
+            jl_methtable_t *mt = (jl_methtable_t*)m->external_mt;
+            if (!module_in_worklist(mt->module)) {
+                serialization_mode |= METHOD_EXTERNAL_MT;
+            }
+        }
+        write_uint8(s->s, serialization_mode);
+        if (serialization_mode & METHOD_EXTERNAL_MT) {
+            // We reference this method table by module and binding
+            jl_methtable_t *mt = (jl_methtable_t*)m->external_mt;
+            jl_serialize_value(s, mt->module);
+            jl_serialize_value(s, mt->name);
+        }
+        else {
+            jl_serialize_value(s, (jl_value_t*)m->external_mt);
+        }
+        if (!(serialization_mode & METHOD_INTERNAL))
             return;
         jl_serialize_value(s, m->specializations);
         jl_serialize_value(s, m->speckeyset);
@@ -815,10 +834,21 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
                 jl_serialize_value(s, tn->wrapper);
                 jl_serialize_value(s, tn->mt);
                 ios_write(s->s, (char*)&tn->hash, sizeof(tn->hash));
+                write_uint8(s->s, tn->abstract | (tn->mutabl << 1) | (tn->mayinlinealloc << 2));
+                if (!tn->abstract)
+                    write_uint16(s->s, tn->n_uninitialized);
+                size_t nb = tn->atomicfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0;
+                write_int32(s->s, nb);
+                if (nb)
+                    ios_write(s->s, (char*)tn->atomicfields, nb);
             }
             return;
         }
 
+        if (jl_is_foreign_type(t)) {
+            jl_error("Cannot serialize instances of foreign datatypes");
+        }
+
         char *data = (char*)jl_data_ptr(v);
         size_t i, j, np = t->layout->npointers;
         uint32_t nf = t->layout->nfields;
@@ -833,7 +863,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
                         ios_write(s->s, last, prevptr - last);
                     jl_value_t *e = *(jl_value_t**)prevptr;
                     JL_GC_PROMISE_ROOTED(e);
-                    if (t->mutabl && e && jl_field_isptr(t, i - 1) && jl_is_cpointer(e) &&
+                    if (t->name->mutabl && e && jl_field_isptr(t, i - 1) && jl_is_cpointer(e) &&
                         jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL)
                         // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
                         jl_serialize_cnull(s, jl_typeof(e));
@@ -849,7 +879,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
             }
             if (i == nf)
                 break;
-            if (t->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(void**)ptr != (void*)-1) {
+            if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(void**)ptr != (void*)-1) {
                 if (ptr > last)
                     ios_write(s->s, last, ptr - last);
                 char *n = NULL;
@@ -910,7 +940,7 @@ static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure)
         size_t i, l = jl_svec_len(specializations);
         for (i = 0; i < l; i++) {
             jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
-            if (callee != NULL)
+            if ((jl_value_t*)callee != jl_nothing)
                 collect_backedges(callee);
         }
     }
@@ -952,6 +982,16 @@ static void jl_collect_lambdas_from_mod(jl_array_t *s, jl_module_t *m) JL_GC_DIS
                         jl_collect_lambdas_from_mod(s, (jl_module_t*)b->value);
                     }
                 }
+                else if (jl_is_mtable(b->value)) {
+                    jl_methtable_t *mt = (jl_methtable_t*)b->value;
+                    if (mt->module == m && mt->name == b->name) {
+                        // this is probably an external method table, so let's assume so
+                        // as there is no way to precisely distinguish them,
+                        // and the rest of this serializer does not bother
+                        // to handle any method tables specially
+                        jl_collect_methtable_from_mod(s, (jl_methtable_t*)bv);
+                    }
+                }
             }
         }
     }
@@ -1015,7 +1055,7 @@ static void jl_collect_backedges(jl_array_t *s, jl_array_t *t)
                         size_t min_valid = 0;
                         size_t max_valid = ~(size_t)0;
                         int ambig = 0;
-                        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, -1, 0, jl_world_counter, &min_valid, &max_valid, &ambig);
+                        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_world_counter, &min_valid, &max_valid, &ambig);
                         if (matches == jl_false) {
                             valid = 0;
                             break;
@@ -1129,10 +1169,11 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t *
     if (!unique_func)
         unique_func = jl_get_global(jl_base_module, jl_symbol("unique"));
     jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps};
-    size_t last_age = jl_get_ptls_states()->world_age;
-    jl_get_ptls_states()->world_age = jl_world_counter;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_world_counter;
     jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
 
     // write a placeholder for total size so that we can quickly seek past all of the
     // dependencies if we don't need them
@@ -1180,8 +1221,8 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t *
 
             if (toplevel && prefs_hash_func && get_compiletime_prefs_func) {
                 // Temporary invoke in newest world age
-                size_t last_age = jl_get_ptls_states()->world_age;
-                jl_get_ptls_states()->world_age = jl_world_counter;
+                size_t last_age = ct->world_age;
+                ct->world_age = jl_world_counter;
 
                 // call get_compiletime_prefs(__toplevel__)
                 jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL};
@@ -1193,7 +1234,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t *
                 prefs_hash = (jl_value_t*)jl_apply(args, 3);
 
                 // Reset world age to normal
-                jl_get_ptls_states()->world_age = last_age;
+                ct->world_age = last_age;
             }
         }
 
@@ -1263,22 +1304,15 @@ static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_v
     uint8_t flags = read_uint8(s->s);
     uint8_t memflags = read_uint8(s->s);
     dt->size = size;
-    dt->abstract = flags & 1;
-    dt->mutabl = (flags >> 1) & 1;
-    int has_layout = (flags >> 2) & 1;
-    int has_instance = (flags >> 3) & 1;
+    int has_layout = flags & 1;
+    int has_instance = (flags >> 1) & 1;
     dt->hasfreetypevars = memflags & 1;
     dt->isconcretetype = (memflags >> 1) & 1;
     dt->isdispatchtuple = (memflags >> 2) & 1;
     dt->isbitstype = (memflags >> 3) & 1;
     dt->zeroinit = (memflags >> 4) & 1;
-    dt->isinlinealloc = (memflags >> 5) & 1;
-    dt->has_concrete_subtype = (memflags >> 6) & 1;
-    dt->cached_by_hash = (memflags >> 7) & 1;
-    if (!dt->abstract)
-        dt->ninitialized = read_uint16(s->s);
-    else
-        dt->ninitialized = 0;
+    dt->has_concrete_subtype = (memflags >> 5) & 1;
+    dt->cached_by_hash = (memflags >> 6) & 1;
     dt->hash = read_int32(s->s);
 
     if (has_layout) {
@@ -1326,8 +1360,6 @@ static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_v
     }
     dt->name = (jl_typename_t*)jl_deserialize_value(s, (jl_value_t**)&dt->name);
     jl_gc_wb(dt, dt->name);
-    dt->names = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->names);
-    jl_gc_wb(dt, dt->names);
     dt->parameters = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->parameters);
     jl_gc_wb(dt, dt->parameters);
     dt->super = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)&dt->super);
@@ -1458,8 +1490,19 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_
     jl_gc_wb(m, m->sig);
     m->module = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->module);
     jl_gc_wb(m, m->module);
-    int internal = read_uint8(s->s);
-    if (!internal) {
+    int serialization_mode = read_uint8(s->s);
+    if (serialization_mode & METHOD_EXTERNAL_MT) {
+        jl_module_t *mt_mod = (jl_module_t*)jl_deserialize_value(s, NULL);
+        jl_sym_t *mt_name = (jl_sym_t*)jl_deserialize_value(s, NULL);
+        m->external_mt = jl_get_global(mt_mod, mt_name);
+        jl_gc_wb(m, m->external_mt);
+        assert(jl_typeis(m->external_mt, jl_methtable_type));
+    }
+    else {
+        m->external_mt = jl_deserialize_value(s, &m->external_mt);
+        jl_gc_wb(m, m->external_mt);
+    }
+    if (!(serialization_mode & METHOD_INTERNAL)) {
         assert(loc != NULL && loc != HT_NOTFOUND);
         arraylist_push(&flagref_list, loc);
         arraylist_push(&flagref_list, (void*)pos);
@@ -1574,8 +1617,10 @@ static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl
         codeinst->precompile = 1;
     codeinst->next = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->next);
     jl_gc_wb(codeinst, codeinst->next);
-    if (validate)
+    if (validate) {
         codeinst->min_world = jl_world_counter;
+        ptrhash_put(&new_code_instance_validate, codeinst, (void*)(~(uintptr_t)HT_NOTFOUND));   // "HT_FOUND"
+    }
     return (jl_value_t*)codeinst;
 }
 
@@ -1713,7 +1758,6 @@ static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag,
             memset(tn, 0, sizeof(jl_typename_t));
             tn->cache = jl_emptysvec; // the cache is refilled later (tag 5)
             tn->linearcache = jl_emptysvec; // the cache is refilled later (tag 5)
-            tn->partial = NULL;
             backref_list.items[pos] = tn;
         }
         jl_module_t *m = (jl_module_t*)jl_deserialize_value(s, NULL);
@@ -1728,6 +1772,19 @@ static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag,
             tn->mt = (jl_methtable_t*)jl_deserialize_value(s, (jl_value_t**)&tn->mt);
             jl_gc_wb(tn, tn->mt);
             ios_read(s->s, (char*)&tn->hash, sizeof(tn->hash));
+            int8_t flags = read_int8(s->s);
+            tn->abstract = flags & 1;
+            tn->mutabl = (flags>>1) & 1;
+            tn->mayinlinealloc = (flags>>2) & 1;
+            if (tn->abstract)
+                tn->n_uninitialized = 0;
+            else
+                tn->n_uninitialized = read_uint16(s->s);
+            size_t nfields = read_int32(s->s);
+            if (nfields) {
+                tn->atomicfields = (uint32_t*)malloc(nfields);
+                ios_read(s->s, (char*)tn->atomicfields, nfields);
+            }
         }
         else {
             jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(jl_get_global(m, sym));
@@ -1897,7 +1954,7 @@ static void jl_insert_methods(jl_array_t *list)
         assert(!meth->is_for_opaque_closure);
         jl_tupletype_t *simpletype = (jl_tupletype_t*)jl_array_ptr_ref(list, i + 1);
         assert(jl_is_method(meth));
-        jl_methtable_t *mt = jl_method_table_for((jl_value_t*)meth->sig);
+        jl_methtable_t *mt = jl_method_get_table(meth);
         assert((jl_value_t*)mt != jl_nothing);
         jl_method_table_insert(mt, meth, simpletype);
     }
@@ -1909,6 +1966,8 @@ static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
     size_t i, l = jl_array_len(targets) / 2;
     jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
     memset(jl_array_data(valids), 1, l);
+    jl_value_t *loctag = NULL;
+    JL_GC_PUSH1(&loctag);
     *pvalids = valids;
     for (i = 0; i < l; i++) {
         jl_value_t *callee = jl_array_ptr_ref(targets, i * 2);
@@ -1927,7 +1986,7 @@ static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
         size_t max_valid = ~(size_t)0;
         int ambig = 0;
         // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
-        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, -1, 0, jl_world_counter, &min_valid, &max_valid, &ambig);
+        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_world_counter, &min_valid, &max_valid, &ambig);
         if (matches == jl_false || jl_array_len(matches) != jl_array_len(expected)) {
             valid = 0;
         }
@@ -1950,7 +2009,13 @@ static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
             }
         }
         jl_array_uint8_set(valids, i, valid);
+        if (!valid && _jl_debug_method_invalidation) {
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee);
+            loctag = jl_cstr_to_string("insert_backedges_callee");
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+        }
     }
+    JL_GC_POP();
 }
 
 static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
@@ -1993,10 +2058,16 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
             while (codeinst) {
                 if (codeinst->min_world > 0)
                     codeinst->max_world = ~(size_t)0;
+                ptrhash_remove(&new_code_instance_validate, codeinst);  // mark it as handled
                 codeinst = jl_atomic_load_relaxed(&codeinst->next);
             }
         }
         else {
+            jl_code_instance_t *codeinst = caller->cache;
+            while (codeinst) {
+                ptrhash_remove(&new_code_instance_validate, codeinst);  // should be left invalid
+                codeinst = jl_atomic_load_relaxed(&codeinst->next);
+            }
             if (_jl_debug_method_invalidation) {
                 jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
                 loctag = jl_cstr_to_string("insert_backedges");
@@ -2007,6 +2078,15 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
     JL_GC_POP();
 }
 
+static void validate_new_code_instances(void)
+{
+    size_t i;
+    for (i = 0; i < new_code_instance_validate.size; i += 2) {
+        if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) {
+            ((jl_code_instance_t*)new_code_instance_validate.table[i])->max_world = ~(size_t)0;
+        }
+    }
+}
 
 static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *mod_list)
 {
@@ -2226,7 +2306,7 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
 
     jl_serializer_state s = {
         &f,
-        jl_get_ptls_states(),
+        jl_current_task->ptls,
         mod_array
     };
     jl_serialize_value(&s, worklist);
@@ -2465,7 +2545,7 @@ static jl_method_t *jl_recache_method(jl_method_t *m)
 {
     assert(!m->is_for_opaque_closure);
     jl_datatype_t *sig = (jl_datatype_t*)m->sig;
-    jl_methtable_t *mt = jl_method_table_for((jl_value_t*)m->sig);
+    jl_methtable_t *mt = jl_method_get_table(m);
     assert((jl_value_t*)mt != jl_nothing);
     jl_set_typeof(m, (void*)(intptr_t)0x30); // invalidate the old value to help catch errors
     jl_method_t *_new = jl_lookup_method(mt, sig, m->module->primary_world);
@@ -2537,7 +2617,7 @@ static int trace_method(jl_typemap_entry_t *entry, void *closure)
 static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
 {
     JL_TIMING(LOAD_MODULE);
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (ios_eof(f) || !jl_read_verify_header(f)) {
         ios_close(f);
         return jl_get_exceptionf(jl_errorexception_type,
@@ -2568,18 +2648,19 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
 
     // prepare to deserialize
     int en = jl_gc_enable(0);
-    jl_gc_enable_finalizers(ptls, 0);
+    jl_gc_enable_finalizers(ct, 0);
     ++jl_world_counter; // reserve a world age for the deserialization
 
     arraylist_new(&backref_list, 4000);
     arraylist_push(&backref_list, jl_main_module);
     arraylist_new(&flagref_list, 0);
+    htable_new(&new_code_instance_validate, 0);
     arraylist_new(&ccallable_list, 0);
     htable_new(&uniquing_table, 0);
 
     jl_serializer_state s = {
         f,
-        ptls,
+        ct->ptls,
         mod_array
     };
     jl_array_t *restored = (jl_array_t*)jl_deserialize_value(&s, (jl_value_t**)&restored);
@@ -2609,12 +2690,16 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
 
     jl_insert_backedges((jl_array_t*)external_backedges, (jl_array_t*)external_edges); // restore external backedges (needs to be last)
 
+    // check new CodeInstances and validate any that lack external backedges
+    validate_new_code_instances();
+
     serializer_worklist = NULL;
+    htable_free(&new_code_instance_validate);
     arraylist_free(&flagref_list);
     arraylist_free(&backref_list);
     ios_close(f);
 
-    jl_gc_enable_finalizers(ptls, 1); // make sure we don't run any Julia code concurrently before this point
+    jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point
     if (tracee_list) {
         jl_methtable_t *mt;
         while ((mt = (jl_methtable_t*)arraylist_pop(tracee_list)) != NULL) {
@@ -2659,13 +2744,13 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *m
 
 void jl_init_serializer(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     htable_new(&ser_tag, 0);
     htable_new(&common_symbol_tag, 0);
     htable_new(&backref_table, 0);
 
     void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type,
-                     call_sym, invoke_sym, goto_ifnot_sym, return_sym, jl_symbol("tuple"),
+                     call_sym, invoke_sym, invoke_modify_sym, goto_ifnot_sym, return_sym, jl_symbol("tuple"),
                      jl_an_empty_string, jl_an_empty_vec_any,
 
                      // empirical list of very common symbols
@@ -2704,7 +2789,7 @@ void jl_init_serializer(void)
                      jl_type_type_mt, jl_nonfunction_mt,
                      jl_opaque_closure_type,
 
-                     ptls->root_task,
+                     ct->ptls->root_task,
 
                      NULL };
 
diff --git a/src/features_aarch64.h b/src/features_aarch64.h
index 6a834610f0cde0..88d988a35f03f3 100644
--- a/src/features_aarch64.h
+++ b/src/features_aarch64.h
@@ -35,11 +35,11 @@ JL_FEATURE_DEF(fp16fml, 23, 0) // HWCAP_ASIMDFHM, ARMv8.2-FHM
 JL_FEATURE_DEF(dit, 24, 0) // HWCAP_DIT, ARMv8.4-DIT. Required in ARMv8.4
 // JL_FEATURE_DEF(uscat, 25, UINT32_MAX) // HWCAP_USCAT, ARMv8.4-LSE
 JL_FEATURE_DEF_NAME(rcpc_immo, 26, 0, "rcpc-immo") // HWCAP_ILRCPC, ARMv8.4-RCPC. Required in ARMv8.4
-JL_FEATURE_DEF(fmi, 27, 0) // HWCAP_FLAGM, ARMv8.4-CondM. Requird in ARMv8.4
+JL_FEATURE_DEF(flagm, 27, 120000) // HWCAP_FLAGM, ARMv8.4-CondM. Required in ARMv8.4
 JL_FEATURE_DEF(ssbs, 28, 0) // HWCAP_SSBS
 JL_FEATURE_DEF(sb, 29, 0) // HWCAP_SB. Required in ARMv8.5
-JL_FEATURE_DEF(pa, 30, 0) // HWCAP_PACA
-// JL_FEATURE_DEF(pa, 31, 0) // HWCAP_PACG. Merged with `pa`.
+JL_FEATURE_DEF(pauth, 30, 120000) // HWCAP_PACA
+// JL_FEATURE_DEF(pa, 31, 0) // HWCAP_PACG. Merged with `pauth`.
 
 // hwcap2
 JL_FEATURE_DEF(ccdp, 32 + 0, 0) // HWCAP2_DCPODP, ARMv8.2-DCCVADP. Required in ARMv8.5
diff --git a/src/features_x86.h b/src/features_x86.h
index ad6a5eb1e515ae..3ef71fb217db69 100644
--- a/src/features_x86.h
+++ b/src/features_x86.h
@@ -33,7 +33,7 @@ JL_FEATURE_DEF(bmi, 32 * 2 + 3, 0)
 // JL_FEATURE_DEF(hle, 32 * 2 + 4, 0) // Not used and gone in LLVM 5.0
 JL_FEATURE_DEF(avx2, 32 * 2 + 5, 0)
 JL_FEATURE_DEF(bmi2, 32 * 2 + 8, 0)
-// JL_FEATURE_DEF(invpcid, 32 * 2 + 10, 0) // Priviledged instruction
+// JL_FEATURE_DEF(invpcid, 32 * 2 + 10, 0) // Privileged instruction
 JL_FEATURE_DEF(rtm, 32 * 2 + 11, 0)
 // JL_FEATURE_DEF(mpx, 32 * 2 + 14, 0) // Deprecated in LLVM 10.0
 JL_FEATURE_DEF(avx512f, 32 * 2 + 16, 0)
diff --git a/src/flisp/LICENSE b/src/flisp/LICENSE
new file mode 100644
index 00000000000000..34860f4ba63d4f
--- /dev/null
+++ b/src/flisp/LICENSE
@@ -0,0 +1,26 @@
+Copyright (c) 2009 Jeff Bezanson
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the author nor the names of any contributors may be used to
+      endorse or promote products derived from this software without specific
+      prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/src/flisp/julia_charmap.h b/src/flisp/julia_charmap.h
index bed88a9ace4cd9..59f408ce012c91 100644
--- a/src/flisp/julia_charmap.h
+++ b/src/flisp/julia_charmap.h
@@ -4,4 +4,7 @@
 static const uint32_t charmap[][2] = {
     { 0x025B, 0x03B5 }, // latin small letter open e -> greek small letter epsilon
     { 0x00B5, 0x03BC }, // micro sign -> greek small letter mu
+    { 0x00B7, 0x22C5 }, // middot char -> dot operator (#25098)
+    { 0x0387, 0x22C5 }, // Greek interpunct -> dot operator (#25098)
+    { 0x2212, 0x002D }, // minus -> hyphen-minus (#26193)
 };
diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c
index e6ffcfcde131ce..dbe94e1388069e 100644
--- a/src/flisp/julia_extensions.c
+++ b/src/flisp/julia_extensions.c
@@ -351,6 +351,15 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg
     return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf));
 }
 
+/* convert a string to a symbol, first applying normalization */
+value_t fl_string2normsymbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+{
+    argcount(fl_ctx, "string->normsymbol", nargs, 1);
+    if (!fl_isstring(fl_ctx, args[0]))
+        type_error(fl_ctx, "string->normsymbol", "string", args[0]);
+    return symbol(fl_ctx, normalize(fl_ctx, (char*)cvalue_data(args[0])));
+}
+
 static const builtinspec_t julia_flisp_func_info[] = {
     { "skip-ws", fl_skipws },
     { "accum-julia-symbol", fl_accum_julia_symbol },
@@ -360,6 +369,7 @@ static const builtinspec_t julia_flisp_func_info[] = {
     { "op-suffix-char?", fl_julia_op_suffix_char },
     { "strip-op-suffix", fl_julia_strip_op_suffix },
     { "underscore-symbol?", fl_julia_underscore_symbolp },
+    { "string->normsymbol", fl_string2normsymbol },
     { NULL, NULL }
 };
 
diff --git a/src/gc-debug.c b/src/gc-debug.c
index febab095a409c9..ed09cff212ff80 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -315,7 +315,6 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
         char *cur_page = gc_page_data((char*)halfpages - 1);
         if (cur_page == data) {
             lim = (char*)halfpages - 1;
-            break;
         }
     }
     // compute the freelist_map
@@ -539,14 +538,14 @@ void gc_scrub_record_task(jl_task_t *t)
 
 static void gc_scrub_range(char *low, char *high)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
     if (jl_setjmp(buf, 0)) {
-        ptls->safe_restore = old_buf;
+        jl_set_safe_restore(old_buf);
         return;
     }
-    ptls->safe_restore = &buf;
+    jl_set_safe_restore(&buf);
     low = (char*)((uintptr_t)low & ~(uintptr_t)15);
     for (char **stack_p = ((char**)high) - 1; stack_p > (char**)low; stack_p--) {
         char *p = *stack_p;
@@ -570,13 +569,13 @@ static void gc_scrub_range(char *low, char *high)
         // set mark to GC_MARKED (young and marked)
         tag->bits.gc = GC_MARKED;
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
 }
 
 static void gc_scrub_task(jl_task_t *ta)
 {
     int16_t tid = ta->tid;
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_ptls_t ptls2 = NULL;
     if (tid != -1)
         ptls2 = jl_all_tls_states[tid];
@@ -1252,12 +1251,12 @@ int gc_slot_to_arrayidx(void *obj, void *_slot)
 // `pc_offset` will be added to `sp` for convenience in the debugger.
 NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset)
 {
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
-    ptls->safe_restore = &buf;
+    jl_set_safe_restore(&buf);
     if (jl_setjmp(buf, 0) != 0) {
         jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n");
-        ptls->safe_restore = old_buf;
+        jl_set_safe_restore(old_buf);
         return;
     }
     void **top = sp.pc + pc_offset;
@@ -1378,7 +1377,7 @@ NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_off
             break;
         }
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index 934dac2d7d6c6d..fb43affe53b0d2 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -119,7 +119,8 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
 
 JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
 {
-    _jl_free_stack(jl_get_ptls_states(), stkbuf, bufsz);
+    jl_task_t *ct = jl_current_task;
+    _jl_free_stack(ct->ptls, stkbuf, bufsz);
 }
 
 
@@ -142,7 +143,8 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
 
 JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     size_t ssize = *bufsz;
     void *stk = NULL;
     if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
@@ -231,7 +233,7 @@ void sweep_stack_pools(void)
                     t->stkbuf = NULL;
                     _jl_free_stack(ptls2, stkbuf, bufsz);
                 }
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
                 if (t->ctx.tsan_state) {
                     __tsan_destroy_fiber(t->ctx.tsan_state);
                     t->ctx.tsan_state = NULL;
@@ -250,13 +252,14 @@ void sweep_stack_pools(void)
 
 JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     arraylist_t *live_tasks = &ptls->heap.live_tasks;
     size_t i, j, l;
     jl_array_t *a;
     do {
         l = live_tasks->len;
-        a = jl_alloc_vec_any(l + 1); // may gc
+        a = jl_alloc_vec_any(l + 1); // may gc, changing the number of tasks
     } while (l + 1 < live_tasks->len);
     l = live_tasks->len;
     void **lst = live_tasks->items;
diff --git a/src/gc.c b/src/gc.c
index 94f5a80fd0cbea..8fb0e00e8f17bf 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -265,7 +265,7 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
     jl_gc_have_pending_finalizers = 1;
 }
 
-static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff)
+static void run_finalizer(jl_task_t *ct, jl_value_t *o, jl_value_t *ff)
 {
     if (gc_ptr_tag(o, 1)) {
         ((void (*)(void*))ff)(gc_ptr_clear_tag(o, 1));
@@ -273,10 +273,10 @@ static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff)
     }
     jl_value_t *args[2] = {ff,o};
     JL_TRY {
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_world_counter;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_world_counter;
         jl_apply(args, 2);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
     }
     JL_CATCH {
         jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
@@ -331,7 +331,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o,
         // The `memset` (like any other content mutation) has to be done
         // **before** the `cmpxchg` which publishes the length.
         memset(&items[len], 0, (oldlen - len) * sizeof(void*));
-        jl_atomic_compare_exchange(&list->len, oldlen, len);
+        jl_atomic_cmpswap(&list->len, &oldlen, len);
     }
     else {
         list->len = len;
@@ -340,36 +340,36 @@ static void finalize_object(arraylist_t *list, jl_value_t *o,
 
 // The first two entries are assumed to be empty and the rest are assumed to
 // be pointers to `jl_value_t` objects
-static void jl_gc_push_arraylist(jl_ptls_t ptls, arraylist_t *list)
+static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list)
 {
     void **items = list->items;
     items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
-    items[1] = ptls->pgcstack;
-    ptls->pgcstack = (jl_gcframe_t*)items;
+    items[1] = ct->gcstack;
+    ct->gcstack = (jl_gcframe_t*)items;
 }
 
 // Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
 // to be hold for the current thread and will release the lock when the
 // function returns.
-static void jl_gc_run_finalizers_in_list(jl_ptls_t ptls, arraylist_t *list)
+static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list)
 {
     // empty out the first two entries for the GC frame
     arraylist_push(list, list->items[0]);
     arraylist_push(list, list->items[1]);
-    jl_gc_push_arraylist(ptls, list);
+    jl_gc_push_arraylist(ct, list);
     jl_value_t **items = (jl_value_t**)list->items;
     size_t len = list->len;
     JL_UNLOCK_NOGC(&finalizers_lock);
     // run finalizers in reverse order they were added, so lower-level finalizers run last
     for (size_t i = len-4; i >= 2; i -= 2)
-        run_finalizer(ptls, items[i], items[i + 1]);
+        run_finalizer(ct, items[i], items[i + 1]);
     // first entries were moved last to make room for GC frame metadata
-    run_finalizer(ptls, items[len-2], items[len-1]);
+    run_finalizer(ct, items[len-2], items[len-1]);
     // matches the jl_gc_push_arraylist above
     JL_GC_POP();
 }
 
-static void run_finalizers(jl_ptls_t ptls)
+static void run_finalizers(jl_task_t *ct)
 {
     // Racy fast path:
     // The race here should be OK since the race can only happen if
@@ -391,17 +391,18 @@ static void run_finalizers(jl_ptls_t ptls)
     jl_gc_have_pending_finalizers = 0;
     arraylist_new(&to_finalize, 0);
     // This releases the finalizers lock.
-    jl_gc_run_finalizers_in_list(ptls, &copied_list);
+    jl_gc_run_finalizers_in_list(ct, &copied_list);
     arraylist_free(&copied_list);
 }
 
-JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_ptls_t ptls)
+JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
 {
-    if (ptls == NULL)
-        ptls = jl_get_ptls_states();
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
         ptls->in_finalizer = 1;
-        run_finalizers(ptls);
+        run_finalizers(ct);
         ptls->in_finalizer = 0;
     }
 }
@@ -409,30 +410,31 @@ JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_ptls_t ptls)
 JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
 {
     if (ptls == NULL)
-        ptls = jl_get_ptls_states();
+        ptls = jl_current_task->ptls;
     return ptls->finalizers_inhibited;
 }
 
 JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     ptls->finalizers_inhibited++;
 }
 
 JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
 #ifdef NDEBUG
-    ptls->finalizers_inhibited--;
+    ct->ptls->finalizers_inhibited--;
 #else
-    jl_gc_enable_finalizers(ptls, 1);
+    jl_gc_enable_finalizers(ct, 1);
 #endif
 }
 
-JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on)
+JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
 {
-    if (ptls == NULL)
-        ptls = jl_get_ptls_states();
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     int old_val = ptls->finalizers_inhibited;
     int new_val = old_val + (on ? -1 : 1);
     if (new_val < 0) {
@@ -452,7 +454,7 @@ JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on)
     }
     ptls->finalizers_inhibited = new_val;
     if (jl_gc_have_pending_finalizers) {
-        jl_gc_run_pending_finalizers(ptls);
+        jl_gc_run_pending_finalizers(ct);
     }
 }
 
@@ -470,19 +472,19 @@ static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
     flist->len = 0;
 }
 
-void jl_gc_run_all_finalizers(jl_ptls_t ptls)
+void jl_gc_run_all_finalizers(jl_task_t *ct)
 {
     schedule_all_finalizers(&finalizer_list_marked);
     for (int i = 0;i < jl_n_threads;i++) {
         jl_ptls_t ptls2 = jl_all_tls_states[i];
         schedule_all_finalizers(&ptls2->finalizers);
     }
-    run_finalizers(ptls);
+    run_finalizers(ct);
 }
 
-static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f)
+static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
 {
-    int8_t gc_state = jl_gc_unsafe_enter(ptls);
+    assert(ptls->gc_state == 0);
     arraylist_t *a = &ptls->finalizers;
     // This acquire load and the release store at the end are used to
     // synchronize with `finalize_object` on another thread. Apart from the GC,
@@ -506,15 +508,14 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f)
     items[oldlen] = v;
     items[oldlen + 1] = f;
     jl_atomic_store_release(&a->len, oldlen + 2);
-    jl_gc_unsafe_leave(ptls, gc_state);
 }
 
-JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f)
+JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
 {
     gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
 }
 
-JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f)
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
 {
     if (__unlikely(jl_typeis(f, jl_voidpointer_type))) {
         jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
@@ -524,7 +525,7 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct
     }
 }
 
-JL_DLLEXPORT void jl_finalize_th(jl_ptls_t ptls, jl_value_t *o)
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
 {
     JL_LOCK_NOGC(&finalizers_lock);
     // Copy the finalizers into a temporary list so that code in the finalizer
@@ -536,12 +537,12 @@ JL_DLLEXPORT void jl_finalize_th(jl_ptls_t ptls, jl_value_t *o)
     // still holding a reference to the object
     for (int i = 0; i < jl_n_threads; i++) {
         jl_ptls_t ptls2 = jl_all_tls_states[i];
-        finalize_object(&ptls2->finalizers, o, &copied_list, ptls != ptls2);
+        finalize_object(&ptls2->finalizers, o, &copied_list, ct->tid != i);
     }
     finalize_object(&finalizer_list_marked, o, &copied_list, 0);
     if (copied_list.len > 0) {
         // This releases the finalizers lock.
-        jl_gc_run_finalizers_in_list(ptls, &copied_list);
+        jl_gc_run_finalizers_in_list(ct, &copied_list);
     }
     else {
         JL_UNLOCK_NOGC(&finalizers_lock);
@@ -1048,7 +1049,7 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT
 
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     ptls->gc_num.allocd += sz;
 }
 
@@ -1179,7 +1180,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 {
     // Do not pass in `ptls` as argument. This slows down the fast path
     // in pool_alloc significantly
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
     pg->osize = p->osize;
     pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1);
@@ -1548,20 +1549,20 @@ static void gc_sweep_perm_alloc(void)
 
 // mark phase
 
-JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *ptr)
+JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_taggedvalue_t *o = jl_astaggedvalue(ptr);
     // The modification of the `gc_bits` is not atomic but it
     // should be safe here since GC is not allowed to run here and we only
     // write GC_OLD to the GC bits outside GC. This could cause
     // duplicated objects in the remset but that shouldn't be a problem.
     o->bits.gc = GC_MARKED;
-    arraylist_push(ptls->heap.remset, ptr);
+    arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
     ptls->heap.remset_nptr++; // conservative
 }
 
-void jl_gc_queue_multiroot(jl_value_t *parent, jl_value_t *ptr) JL_NOTSAFEPOINT
+void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
 {
     // first check if this is really necessary
     // TODO: should we store this info in one of the extra gc bits?
@@ -1602,7 +1603,7 @@ void jl_gc_queue_multiroot(jl_value_t *parent, jl_value_t *ptr) JL_NOTSAFEPOINT
 
 void gc_queue_binding(jl_binding_t *bnd)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_taggedvalue_t *buf = jl_astaggedvalue(bnd);
     buf->bits.gc = GC_MARKED;
     arraylist_push(&ptls->heap.rem_bindings, bnd);
@@ -2563,6 +2564,8 @@ mark: {
             if (a->data == NULL || jl_array_len(a) == 0)
                 goto pop;
             if (flags.ptrarray) {
+                if ((jl_datatype_t*)jl_tparam0(vt) == jl_symbol_type)
+                    goto pop;
                 size_t l = jl_array_len(a);
                 uintptr_t nptr = (l << 2) | (bits & GC_OLD);
                 objary_begin = (jl_value_t**)a->data;
@@ -2637,40 +2640,31 @@ mark: {
             jl_task_t *ta = (jl_task_t*)new_obj;
             gc_scrub_record_task(ta);
             void *stkbuf = ta->stkbuf;
-            int16_t tid = ta->tid;
-            jl_ptls_t ptls2 = NULL;
-            if (tid != -1)
-                ptls2 = jl_all_tls_states[tid];
             if (gc_cblist_task_scanner) {
                 export_gc_state(ptls, &sp);
                 gc_invoke_callbacks(jl_gc_cb_task_scanner_t,
                     gc_cblist_task_scanner,
-                    (ta, ptls2 != NULL && ta == ptls2->root_task));
+                    (ta, ta->tid != -1 && ta == jl_all_tls_states[ta->tid]->root_task));
                 import_gc_state(ptls, &sp);
             }
 #ifdef COPY_STACKS
             if (stkbuf && ta->copy_stack)
                 gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
 #endif
-            jl_gcframe_t *s = NULL;
+            jl_gcframe_t *s = ta->gcstack;
             size_t nroots;
             uintptr_t offset = 0;
             uintptr_t lb = 0;
             uintptr_t ub = (uintptr_t)-1;
-            if (ptls2 && ta == ptls2->current_task) {
-                s = ptls2->pgcstack;
-            }
-            else if (stkbuf) {
-                s = ta->gcstack;
 #ifdef COPY_STACKS
-                if (ta->copy_stack) {
-                    assert(tid != -1 && ptls2 != NULL);
-                    ub = (uintptr_t)ptls2->stackbase;
-                    lb = ub - ta->copy_stack;
-                    offset = (uintptr_t)stkbuf - lb;
-                }
-#endif
+            if (stkbuf && ta->copy_stack && ta->ptls == NULL) {
+                assert(ta->tid >= 0);
+                jl_ptls_t ptls2 = jl_all_tls_states[ta->tid];
+                ub = (uintptr_t)ptls2->stackbase;
+                lb = ub - ta->copy_stack;
+                offset = (uintptr_t)stkbuf - lb;
             }
+#endif
             if (s) {
                 nroots = gc_read_stack(&s->nroots, offset, lb, ub);
                 assert(nroots <= UINT32_MAX);
@@ -2779,11 +2773,14 @@ static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp
     gc_mark_queue_obj(gc_cache, sp, ptls2->root_task);
     if (ptls2->next_task)
         gc_mark_queue_obj(gc_cache, sp, ptls2->next_task);
+    if (ptls2->previous_task) // shouldn't be necessary, but no reason not to
+        gc_mark_queue_obj(gc_cache, sp, ptls2->previous_task);
     if (ptls2->previous_exception)
         gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception);
 }
 
 void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp);
+extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
 
 // mark the initial root set
 static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
@@ -2815,6 +2812,8 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
 
     // constants
     gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
+    if (cmpswap_names != NULL)
+        gc_mark_queue_obj(gc_cache, sp, cmpswap_names);
 }
 
 // find unmarked objects that need to be finalized from the finalizer list "list".
@@ -2861,11 +2860,11 @@ static void sweep_finalizer_list(arraylist_t *list)
 }
 
 // collector entry point and control
-static volatile uint32_t jl_gc_disable_counter = 0;
+static volatile uint32_t jl_gc_disable_counter = 1;
 
 JL_DLLEXPORT int jl_gc_enable(int on)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     int prev = !ptls->disable_gc;
     ptls->disable_gc = (on == 0);
     if (on && !prev) {
@@ -2886,7 +2885,7 @@ JL_DLLEXPORT int jl_gc_enable(int on)
 
 JL_DLLEXPORT int jl_gc_is_enabled(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return !ptls->disable_gc;
 }
 
@@ -3008,6 +3007,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 
     uint64_t t0 = jl_hrtime();
     int64_t last_perm_scanned_bytes = perm_scanned_bytes;
+    JL_PROBE_GC_MARK_BEGIN();
 
     // 1. fix GC bits of objects in the remset.
     for (int t_i = 0; t_i < jl_n_threads; t_i++)
@@ -3034,6 +3034,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_mark_loop(ptls, sp);
     gc_mark_sp_init(gc_cache, &sp);
     gc_num.since_sweep += gc_num.allocd;
+    JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes);
     gc_settime_premark_end();
     gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes);
     int64_t actual_allocd = gc_num.since_sweep;
@@ -3139,6 +3140,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         perm_scanned_bytes = 0;
     scanned_bytes = 0;
     // 5. start sweeping
+    JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
     sweep_weak_refs();
     sweep_stack_pools();
     gc_sweep_foreign_objs();
@@ -3148,6 +3150,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_sweep_pool(sweep_full);
     if (sweep_full)
         gc_sweep_perm_alloc();
+    JL_PROBE_GC_SWEEP_END();
     // sweeping is over
     // 6. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
@@ -3205,7 +3208,10 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 
 JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    JL_PROBE_GC_BEGIN(collection);
+
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     if (jl_gc_disable_counter) {
         size_t localbytes = ptls->gc_num.allocd + gc_num.interval;
         ptls->gc_num.allocd = -(int64_t)gc_num.interval;
@@ -3233,6 +3239,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // TODO (concurrently queue objects)
     // no-op for non-threading
     jl_gc_wait_for_the_world();
+    JL_PROBE_GC_STOP_THE_WORLD();
+
     gc_invoke_callbacks(jl_gc_cb_pre_gc_t,
         gc_cblist_pre_gc, (collection));
 
@@ -3250,6 +3258,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // no-op for non-threading
     jl_safepoint_end_gc();
     jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+    JL_PROBE_GC_END();
 
     // Only disable finalizers on current thread
     // Doing this on all threads is racy (it's impossible to check
@@ -3257,9 +3266,11 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
         int8_t was_in_finalizer = ptls->in_finalizer;
         ptls->in_finalizer = 1;
-        run_finalizers(ptls);
+        run_finalizers(ct);
         ptls->in_finalizer = was_in_finalizer;
     }
+    JL_PROBE_GC_FINALIZER();
+
     gc_invoke_callbacks(jl_gc_cb_post_gc_t,
         gc_cblist_post_gc, (collection));
 #ifdef _OS_WINDOWS_
@@ -3286,12 +3297,11 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
 // Per-thread initialization
 void jl_init_thread_heap(jl_ptls_t ptls)
 {
+    if (ptls->tid == 0)
+        ptls->disable_gc = 1;
     jl_thread_heap_t *heap = &ptls->heap;
     jl_gc_pool_t *p = heap->norm_pools;
-    for(int i=0; i < JL_GC_N_POOLS; i++) {
-        assert((jl_gc_sizeclasses[i] < 16 &&
-                jl_gc_sizeclasses[i] % sizeof(void*) == 0) ||
-               (jl_gc_sizeclasses[i] % 16 == 0));
+    for (int i = 0; i < JL_GC_N_POOLS; i++) {
         p[i].osize = jl_gc_sizeclasses[i];
         p[i].freelist = NULL;
         p[i].newpages = NULL;
@@ -3361,8 +3371,10 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
 
 JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls && ptls->world_age) {
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         ptls->gc_num.allocd += sz;
         ptls->gc_num.malloc++;
@@ -3372,8 +3384,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
 
 JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls && ptls->world_age) {
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         ptls->gc_num.allocd += nm*sz;
         ptls->gc_num.malloc++;
@@ -3383,9 +3397,11 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
 
 JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
     free(p);
-    if (ptls && ptls->world_age) {
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
         ptls->gc_num.freed += sz;
         ptls->gc_num.freecall++;
     }
@@ -3393,8 +3409,10 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
 
 JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls && ptls->world_age) {
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         if (sz < old)
             ptls->gc_num.freed += (old - sz);
@@ -3459,7 +3477,7 @@ JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
 
 JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     maybe_collect(ptls);
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
@@ -3522,7 +3540,7 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
 JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
                                          int isaligned, jl_value_t *owner)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1);
 }
 
@@ -3547,7 +3565,7 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
     bigval_t *hdr = bigval_header(v);
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     maybe_collect(ptls); // don't want this to happen during jl_gc_managed_realloc
     gc_big_object_unlink(hdr);
     // TODO: this is not safe since it frees the old pointer. ideally we'd like
@@ -3653,49 +3671,48 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
 
 JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_gc_add_finalizer_th(ptls, v, f);
 }
 
 JL_DLLEXPORT void jl_finalize(jl_value_t *o)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_finalize_th(ptls, o);
+    jl_finalize_th(jl_current_task, o);
 }
 
 JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_new_weakref_th(ptls, value);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, sz, NULL);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, 0, NULL);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, sizeof(void*), NULL);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL);
 }
 
diff --git a/src/gen_sysimg_symtab.jl b/src/gen_sysimg_symtab.jl
index 2d389a7209b339..8f03cc15607679 100644
--- a/src/gen_sysimg_symtab.jl
+++ b/src/gen_sysimg_symtab.jl
@@ -69,5 +69,5 @@ function outputline(io, name)
     println(io, "jl_symbol(\"", name, "\"),")
 end
 
-open(f->foreach(l->outputline(f,l), take(syms, 106)), "common_symbols1.inc", "w")
-open(f->foreach(l->outputline(f,l), take(drop(syms, 106), 254)), "common_symbols2.inc", "w")
+open(f->foreach(l->outputline(f,l), take(syms, 100)), "common_symbols1.inc", "w")
+open(f->foreach(l->outputline(f,l), take(drop(syms, 100), 254)), "common_symbols2.inc", "w")
diff --git a/src/gf.c b/src/gf.c
index abc6ac775b644b..d6c9741f3ebf39 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -32,15 +32,15 @@ JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT
 {
-    return jl_get_ptls_states()->world_age;
+    return jl_current_task->world_age;
 }
 
 /// ----- Handling for Julia callbacks ----- ///
 
 JL_DLLEXPORT int8_t jl_is_in_pure_context(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return ptls->in_pure_callback;
+    jl_task_t *ct = jl_current_task;
+    return ct->ptls->in_pure_callback;
 }
 
 tracer_cb jl_newmeth_tracer = NULL;
@@ -51,15 +51,15 @@ JL_DLLEXPORT void jl_register_newmeth_tracer(void (*callback)(jl_method_t *trace
 
 void jl_call_tracer(tracer_cb callback, jl_value_t *tracee)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    int last_in = ptls->in_pure_callback;
+    jl_task_t *ct = jl_current_task;
+    int last_in = ct->ptls->in_pure_callback;
     JL_TRY {
-        ptls->in_pure_callback = 1;
+        ct->ptls->in_pure_callback = 1;
         callback(tracee);
-        ptls->in_pure_callback = last_in;
+        ct->ptls->in_pure_callback = last_in;
     }
     JL_CATCH {
-        ptls->in_pure_callback = last_in;
+        ct->ptls->in_pure_callback = last_in;
         jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: tracer callback function threw an error:\n");
         jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
@@ -69,9 +69,6 @@ void jl_call_tracer(tracer_cb callback, jl_value_t *tracee)
 
 /// ----- Definitions for various internal TypeMaps ----- ///
 
-static struct jl_typemap_info method_defs = {1};
-static struct jl_typemap_info lambda_cache = {1};
-
 static int8_t jl_cachearg_offset(jl_methtable_t *mt)
 {
     return mt->offs;
@@ -125,7 +122,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
             JL_GC_PUSH1(&specializations); // clang-sa doesn't realize this loop uses specializations
             for (i = cl; i > 0; i--) {
                 jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i - 1]);
-                if (mi == NULL)
+                if ((jl_value_t*)mi == jl_nothing)
                     break;
                 if (jl_types_equal(mi->specTypes, type)) {
                     if (locked)
@@ -146,19 +143,20 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
                 jl_method_instance_t **data = (jl_method_instance_t**)jl_svec_data(specializations);
                 for (i = 0; i < cl; i++) {
                     jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
-                    if (mi == NULL)
+                    if ((jl_value_t*)mi == jl_nothing)
                         break;
                     assert(!jl_types_equal(mi->specTypes, type));
                 }
             }
             jl_method_instance_t *mi = jl_get_specialized(m, type, sparams);
             JL_GC_PUSH1(&mi);
-            if (hv ? (i + 1 >= cl || jl_svecref(specializations, i + 1) != NULL) : (i <= 1 || jl_svecref(specializations, i - 2) != NULL)) {
+            if (hv ? (i + 1 >= cl || jl_svecref(specializations, i + 1) != jl_nothing) : (i <= 1 || jl_svecref(specializations, i - 2) != jl_nothing)) {
                 size_t ncl = cl < 8 ? 8 : (cl*3)>>1;
                 jl_svec_t *nc = jl_alloc_svec_uninit(ncl);
                 if (i > 0)
                     memcpy((char*)jl_svec_data(nc), jl_svec_data(specializations), sizeof(void*) * i);
-                memset((char*)jl_svec_data(nc) + sizeof(void*) * i, 0, sizeof(void*) * (ncl - cl));
+                for (int j = 0; j < ncl - cl; j++)
+                    jl_svecset(nc, j+i, jl_nothing);
                 if (i < cl)
                     memcpy((char*)jl_svec_data(nc) + sizeof(void*) * (i + ncl - cl),
                            (char*)jl_svec_data(specializations) + sizeof(void*) * i,
@@ -171,7 +169,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
             }
             if (!hv)
                 i -= 1;
-            assert(jl_svecref(specializations, i) == NULL);
+            assert(jl_svecref(specializations, i) == jl_nothing);
             jl_svecset(specializations, i, mi); // jl_atomic_store_release?
             if (hv) {
                 // TODO: fuse lookup and insert steps?
@@ -243,7 +241,7 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
     jl_methtable_t *mt = dt->name->mt;
     newentry = jl_typemap_alloc(jl_anytuple_type, NULL, jl_emptysvec,
             (jl_value_t*)mi, 1, ~(size_t)0);
-    jl_typemap_insert(&mt->cache, (jl_value_t*)mt, newentry, 0, &lambda_cache);
+    jl_typemap_insert(&mt->cache, (jl_value_t*)mt, newentry, 0);
 
     mt->frozen = 1;
     JL_GC_POP();
@@ -284,13 +282,13 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
         jl_printf(JL_STDERR, "\n");
     }
 #endif
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-    size_t last_age = ptls->world_age;
-    ptls->world_age = jl_typeinf_world;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_typeinf_world;
     mi->inInference = 1;
     in_inference++;
     JL_TRY {
@@ -303,7 +301,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
         jlbacktrace(); // written to STDERR_FILENO
         src = NULL;
     }
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     in_inference--;
     mi->inInference = 0;
 #ifdef _OS_WINDOWS_
@@ -321,11 +319,11 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
 
 JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
-    ptls->world_age = jl_typeinf_world;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_typeinf_world;
     jl_value_t *ret = jl_apply(args, nargs);
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     return ret;
 }
 
@@ -370,9 +368,9 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
         int32_t const_flags, size_t min_world, size_t max_world
         /*, jl_array_t *edges, int absolute_max*/)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(min_world <= max_world && "attempting to set invalid world constraints");
-    jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ptls, sizeof(jl_code_instance_t),
+    jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_instance_t),
             jl_code_instance_type);
     codeinst->def = mi;
     codeinst->min_world = min_world;
@@ -416,7 +414,7 @@ static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
     size_t i, l = jl_svec_len(specializations);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
-        if (mi) {
+        if ((jl_value_t*)mi != jl_nothing) {
             assert(jl_is_method_instance(mi));
             if (jl_rettype_inferred(mi, jl_world_counter, jl_world_counter) == jl_nothing)
                 jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
@@ -1090,7 +1088,7 @@ static jl_method_instance_t *cache_method(
                     guards++;
                     // alternative approach: insert sentinel entry
                     //jl_typemap_insert(cache, parent, (jl_tupletype_t*)matc->spec_types,
-                    //        NULL, jl_emptysvec, /*guard*/NULL, jl_cachearg_offset(mt), &lambda_cache, other->min_world, other->max_world);
+                    //        NULL, jl_emptysvec, /*guard*/NULL, jl_cachearg_offset(mt), other->min_world, other->max_world);
                 }
             }
         }
@@ -1165,7 +1163,7 @@ static jl_method_instance_t *cache_method(
         jl_gc_wb(mt, mt->leafcache);
     }
     else {
-         jl_typemap_insert(cache, parent, newentry, offs, &lambda_cache);
+         jl_typemap_insert(cache, parent, newentry, offs);
     }
 
     JL_GC_POP();
@@ -1337,22 +1335,23 @@ static void invalidate_external(jl_method_instance_t *mi, size_t max_world) {
             args[1] = (jl_value_t*)mi;
             args[2] = jl_box_uint32(max_world);
 
-            size_t last_age = jl_get_ptls_states()->world_age;
-            jl_get_ptls_states()->world_age = jl_get_world_counter();
+            jl_task_t *ct = jl_current_task;
+            size_t last_age = ct->world_age;
+            ct->world_age = jl_get_world_counter();
 
             jl_value_t **cbs = (jl_value_t**)jl_array_ptr_data(callbacks);
             for (i = 0; i < l; i++) {
                 args[0] = cbs[i];
                 jl_apply(args, 3);
             }
-            jl_get_ptls_states()->world_age = last_age;
+            ct->world_age = last_age;
             JL_GC_POP();
         }
         JL_CATCH {
             jl_printf((JL_STREAM*)STDERR_FILENO, "error in invalidation callback: ");
             jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-            jlbacktrace(); // writen to STDERR_FILENO
+            jlbacktrace(); // written to STDERR_FILENO
         }
     }
 }
@@ -1370,7 +1369,7 @@ static void invalidate_method_instance(jl_method_instance_t *replaced, size_t ma
     }
     if (!jl_is_method(replaced->def.method))
         return; // shouldn't happen, but better to be safe
-    JL_LOCK_NOGC(&replaced->def.method->writelock);
+    JL_LOCK(&replaced->def.method->writelock);
     jl_code_instance_t *codeinst = replaced->cache;
     while (codeinst) {
         if (codeinst->max_world == ~(size_t)0) {
@@ -1390,13 +1389,13 @@ static void invalidate_method_instance(jl_method_instance_t *replaced, size_t ma
             invalidate_method_instance(replaced, max_world, depth + 1);
         }
     }
-    JL_UNLOCK_NOGC(&replaced->def.method->writelock);
+    JL_UNLOCK(&replaced->def.method->writelock);
 }
 
 // invalidate cached methods that overlap this definition
 static void invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
 {
-    JL_LOCK_NOGC(&replaced_mi->def.method->writelock);
+    JL_LOCK(&replaced_mi->def.method->writelock);
     jl_array_t *backedges = replaced_mi->backedges;
     if (backedges) {
         // invalidate callers (if any)
@@ -1407,7 +1406,7 @@ static void invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_w
             invalidate_method_instance(replaced[i], max_world, 1);
         }
     }
-    JL_UNLOCK_NOGC(&replaced_mi->def.method->writelock);
+    JL_UNLOCK(&replaced_mi->def.method->writelock);
     if (why && _jl_debug_method_invalidation) {
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced_mi);
         jl_value_t *loctag = jl_cstr_to_string(why);
@@ -1564,7 +1563,7 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
     l = jl_svec_len(specializations);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
-        if (mi) {
+        if ((jl_value_t*)mi != jl_nothing) {
             invalidated = 1;
             invalidate_external(mi, methodentry->max_world);
             invalidate_backedges(mi, methodentry->max_world, "jl_method_table_disable");
@@ -1641,7 +1640,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
     // then add our new entry
     newentry = jl_typemap_alloc((jl_tupletype_t*)type, simpletype, jl_emptysvec,
             (jl_value_t*)method, method->primary_world, method->deleted_world);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0, &method_defs);
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0);
     if (oldentry) {
         jl_method_t *m = oldentry->func.method;
         method_overwrite(newentry, m);
@@ -1731,7 +1730,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 enum morespec_options ambig = morespec_unknown;
                 for (i = 0; i < l; i++) {
                     jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
-                    if (mi == NULL)
+                    if ((jl_value_t*)mi == jl_nothing)
                         continue;
                     isect3 = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes);
                     if (jl_type_intersection2(type, isect3, &isect, &isect2)) {
@@ -1823,7 +1822,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
         jl_printf((JL_STREAM*)STDERR_FILENO, "A method error occurred before the base MethodError type was defined. Aborting...\n");
         jl_static_show((JL_STREAM*)STDERR_FILENO,(jl_value_t*)f); jl_printf((JL_STREAM*)STDERR_FILENO," world %u\n", (unsigned)world);
         jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
-        jl_ptls_t ptls = jl_get_ptls_states();
+        jl_ptls_t ptls = jl_current_task->ptls;
         ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
         jl_critical_error(0, NULL);
         abort();
@@ -1878,7 +1877,7 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
 //
 // lim is the max # of methods to return. if there are more, returns jl_false.
 // -1 for no limit.
-JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, int lim, int include_ambiguous,
+JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
                                              size_t world, size_t *min_valid, size_t *max_valid, int *ambig)
 {
     JL_TIMING(METHOD_MATCH);
@@ -1887,10 +1886,11 @@ JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, int lim, int
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
     if (jl_is_tuple_type(unw) && jl_tparam0(unw) == jl_bottom_type)
         return (jl_value_t*)jl_an_empty_vec_any;
-    jl_methtable_t *mt = jl_method_table_for(unw);
+    if (mt == jl_nothing)
+        mt = (jl_value_t*)jl_method_table_for(unw);
     if ((jl_value_t*)mt == jl_nothing)
         return jl_false; // indeterminate - ml_matches can't deal with this case
-    return ml_matches(mt, 0, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
+    return ml_matches((jl_methtable_t*)mt, 0, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
 }
 
 jl_method_instance_t *jl_get_unspecialized(jl_method_instance_t *method JL_PROPAGATES_ROOT)
@@ -2069,7 +2069,7 @@ jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES
     size_t min_valid2 = 1;
     size_t max_valid2 = ~(size_t)0;
     int ambig = 0;
-    jl_value_t *matches = jl_matching_methods(types, 1, 1, world, &min_valid2, &max_valid2, &ambig);
+    jl_value_t *matches = jl_matching_methods(types, jl_nothing, 1, 1, world, &min_valid2, &max_valid2, &ambig);
     if (*min_valid < min_valid2)
         *min_valid = min_valid2;
     if (*max_valid > max_valid2)
@@ -2248,7 +2248,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
 
 JL_DLLEXPORT jl_value_t *jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *mfunc)
 {
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     return _jl_invoke(F, args, nargs, mfunc, world);
 }
 
@@ -2419,7 +2419,7 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
 
 JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint32_t nargs)
 {
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     jl_method_instance_t *mfunc = jl_lookup_generic_(F, args, nargs,
                                                      jl_int32hash_fast(jl_return_address()),
                                                      world);
@@ -2473,7 +2473,7 @@ JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, size_t wo
 // NOTE: assumes argument type is a subtype of the lookup type.
 jl_value_t *jl_gf_invoke(jl_value_t *types0, jl_value_t *gf, jl_value_t **args, size_t nargs)
 {
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     jl_value_t *types = NULL;
     JL_GC_PUSH1(&types);
     types = jl_argtype_with_function(gf, types0);
@@ -2522,7 +2522,7 @@ jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
     }
     JL_GC_PROMISE_ROOTED(mfunc);
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     return _jl_invoke(gf, args, nargs - 1, mfunc, world);
 }
 
@@ -2538,7 +2538,8 @@ jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_
     jl_sym_t *tname = jl_symbol(prefixed);
     free(prefixed);
     jl_datatype_t *ftype = (jl_datatype_t*)jl_new_datatype(
-            tname, module, st, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
+            tname, module, st, jl_emptysvec, jl_emptysvec, jl_emptysvec, jl_emptysvec,
+            0, 0, 0);
     assert(jl_is_datatype(ftype));
     JL_GC_PUSH1(&ftype);
     ftype->name->mt->name = name;
@@ -2609,8 +2610,8 @@ enum SIGNATURE_FULLY_COVERS {
 
 static jl_method_match_t *make_method_match(jl_tupletype_t *spec_types, jl_svec_t *sparams, jl_method_t *method, enum SIGNATURE_FULLY_COVERS fully_covers)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_method_match_t *match = (jl_method_match_t*)jl_gc_alloc(ptls, sizeof(jl_method_match_t), jl_method_match_type);
+    jl_task_t *ct = jl_current_task;
+    jl_method_match_t *match = (jl_method_match_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_match_t), jl_method_match_type);
     match->spec_types = spec_types;
     match->sparams = sparams;
     match->method = method;
@@ -2937,6 +2938,14 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
                     int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
                     int rsubt2 = jl_egal((jl_value_t*)matc2->spec_types, m2->sig);
                     jl_value_t *ti;
+                    if (!subt && !subt2 && rsubt && rsubt2 && lim == -1 && ambig == NULL)
+                        // these would only be filtered out of the list as
+                        // ambiguous if they are also type-equal, as we
+                        // aren't skipping matches and the user doesn't
+                        // care if we report any ambiguities
+                        continue;
+                    if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
+                        continue;
                     if (subt) {
                         ti = (jl_value_t*)matc2->spec_types;
                         isect2 = NULL;
@@ -2945,18 +2954,11 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
                         ti = (jl_value_t*)matc->spec_types;
                         isect2 = NULL;
                     }
-                    else if (rsubt && rsubt2 && lim == -1 && ambig == NULL) {
-                        // these would only be filtered out of the list as
-                        // ambiguous if they are also type-equal, as we
-                        // aren't skipping matches and the user doesn't
-                        // care if we report any ambiguities
-                        ti = jl_bottom_type;
-                    }
                     else {
                         jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &env.match.ti, &isect2);
                         ti = env.match.ti;
                     }
-                    if (ti != jl_bottom_type && !jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig)) {
+                    if (ti != jl_bottom_type) {
                         disjoint = 0;
                         // m and m2 are ambiguous, but let's see if we can find another method (m3)
                         // that dominates their intersection, and means we can ignore this
@@ -3137,7 +3139,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
 }
 
 // see if it might be possible to construct an instance of `typ`
-// if ninitialized == nfields, but a fieldtype is Union{},
+// if n_uninitialized == 0, but a fieldtype is Union{},
 // that type will not be constructable, for example, tested recursively
 int jl_has_concrete_subtype(jl_value_t *typ)
 {
@@ -3159,19 +3161,23 @@ int jl_has_concrete_subtype(jl_value_t *typ)
 #define typeinf_lock codegen_lock
 
 static uint64_t inference_start_time = 0;
+static uint8_t inference_is_measuring_compile_time = 0;
 
 JL_DLLEXPORT void jl_typeinf_begin(void)
 {
     JL_LOCK(&typeinf_lock);
-    if (jl_measure_compile_time[jl_threadid()])
+    if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
         inference_start_time = jl_hrtime();
+        inference_is_measuring_compile_time = 1;
+    }
 }
 
 JL_DLLEXPORT void jl_typeinf_end(void)
 {
-    int tid = jl_threadid();
-    if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
+    if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) {
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
+        inference_is_measuring_compile_time = 0;
+    }
     JL_UNLOCK(&typeinf_lock);
 }
 
diff --git a/src/init.c b/src/init.c
index f66403c58199c0..1c58753506fb7a 100644
--- a/src/init.c
+++ b/src/init.c
@@ -115,7 +115,7 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
 static void jl_prep_sanitizers(void)
 {
 #if !defined(_OS_WINDOWS_)
-#if defined(JL_ASAN_ENABLED) || defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
     struct rlimit rl;
 
     // When using the sanitizers, increase stack size because they bloat
@@ -205,7 +205,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
     if (jl_all_tls_states == NULL)
         return;
 
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
 
     if (exitcode == 0)
         jl_write_compiler_output();
@@ -218,10 +218,10 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
         jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_atexit"));
         if (f != NULL) {
             JL_TRY {
-                size_t last_age = ptls->world_age;
-                ptls->world_age = jl_get_world_counter();
+                size_t last_age = ct->world_age;
+                ct->world_age = jl_get_world_counter();
                 jl_apply(&f, 1);
-                ptls->world_age = last_age;
+                ct->world_age = last_age;
             }
             JL_CATCH {
                 jl_printf((JL_STREAM*)STDERR_FILENO, "\natexit hook threw an error: ");
@@ -237,7 +237,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
     JL_STDOUT = (uv_stream_t*) STDOUT_FILENO;
     JL_STDERR = (uv_stream_t*) STDERR_FILENO;
 
-    jl_gc_run_all_finalizers(ptls);
+    jl_gc_run_all_finalizers(ct);
 
     uv_loop_t *loop = jl_global_event_loop();
 
@@ -249,7 +249,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
     JL_UV_LOCK();
     uv_walk(loop, jl_uv_exitcleanup_walk, &queue);
     struct uv_shutdown_queue_item *item = queue.first;
-    if (ptls->current_task != NULL) {
+    if (ct != NULL) {
         while (item) {
             JL_TRY {
                 while (item) {
@@ -615,8 +615,8 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
 
 static void jl_set_io_wait(int v)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    ptls->io_wait = v;
+    jl_task_t *ct = jl_current_task;
+    ct->ptls->io_wait = v;
 }
 
 extern jl_mutex_t jl_modules_mutex;
@@ -628,13 +628,11 @@ static void restore_fp_env(void)
     }
 }
 
-void _julia_init(JL_IMAGE_SEARCH rel)
+JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 {
     jl_init_timing();
     // Make sure we finalize the tls callback before starting any threads.
-    jl_get_ptls_states_getter();
-    jl_ptls_t ptls = jl_get_ptls_states();
-    (void)ptls; assert(ptls); // make sure early that we have initialized ptls
+    (void)jl_get_pgcstack();
     jl_safepoint_init();
     libsupport_init();
     htable_new(&jl_current_modules, 0);
@@ -646,16 +644,9 @@ void _julia_init(JL_IMAGE_SEARCH rel)
     init_stdio();
     restore_fp_env();
     restore_signals();
+    jl_init_intrinsic_properties();
 
     jl_page_size = jl_getpagesize();
-    uint64_t total_mem = uv_get_total_memory();
-    uint64_t constrained_mem = uv_get_constrained_memory();
-    if (constrained_mem > 0 && constrained_mem < total_mem)
-        total_mem = constrained_mem;
-    if (total_mem >= (size_t)-1) {
-        total_mem = (size_t)-1;
-    }
-    jl_arr_xtralloc_limit = total_mem / 100;  // Extra allocation limited to 1% of total RAM
     jl_prep_sanitizers();
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(1, &stack_lo, &stack_hi);
@@ -687,22 +678,26 @@ void _julia_init(JL_IMAGE_SEARCH rel)
 #endif
 #endif
 
-#if defined(JL_USE_INTEL_JITEVENTS)
+#if \
+    defined(JL_USE_INTEL_JITEVENTS) || \
+    defined(JL_USE_OPROFILE_JITEVENTS) || \
+    defined(JL_USE_PERF_JITEVENTS)
     const char *jit_profiling = getenv("ENABLE_JITPROFILING");
+#endif
+
+#if defined(JL_USE_INTEL_JITEVENTS)
     if (jit_profiling && atoi(jit_profiling)) {
         jl_using_intel_jitevents = 1;
     }
 #endif
 
 #if defined(JL_USE_OPROFILE_JITEVENTS)
-    const char *jit_profiling = getenv("ENABLE_JITPROFILING");
     if (jit_profiling && atoi(jit_profiling)) {
         jl_using_oprofile_jitevents = 1;
     }
 #endif
 
 #if defined(JL_USE_PERF_JITEVENTS)
-    const char *jit_profiling = getenv("ENABLE_JITPROFILING");
     if (jit_profiling && atoi(jit_profiling)) {
         jl_using_perf_jitevents= 1;
     }
@@ -723,11 +718,14 @@ void _julia_init(JL_IMAGE_SEARCH rel)
     }
 
     jl_gc_init();
-
+    jl_init_tasks();
     jl_init_threading();
-    jl_init_intrinsic_properties();
 
-    jl_gc_enable(0);
+    jl_ptls_t ptls = jl_init_threadtls(0);
+    jl_init_root_task(ptls, stack_lo, stack_hi);
+    jl_task_t *ct = jl_current_task;
+
+    jl_init_threadinginfra();
 
     jl_resolve_sysimg_location(rel);
     // loads sysimg if available, and conditionally sets jl_options.cpu_target
@@ -738,14 +736,11 @@ void _julia_init(JL_IMAGE_SEARCH rel)
 
     if (jl_options.image_file) {
         jl_restore_system_image(jl_options.image_file);
-    }
-    else {
+    } else {
         jl_init_types();
         jl_init_codegen();
     }
 
-    jl_init_tasks();
-    jl_init_root_task(stack_lo, stack_hi);
     jl_init_common_symbols();
     jl_init_flisp();
     jl_init_serializer();
@@ -766,10 +761,10 @@ void _julia_init(JL_IMAGE_SEARCH rel)
         // Do initialization needed before starting child threads
         jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("__preinit_threads__"));
         if (f) {
-            size_t last_age = ptls->world_age;
-            ptls->world_age = jl_get_world_counter();
+            size_t last_age = ct->world_age;
+            ct->world_age = jl_get_world_counter();
             jl_apply(&f, 1);
-            ptls->world_age = last_age;
+            ct->world_age = last_age;
         }
     }
     else {
@@ -833,6 +828,7 @@ static void post_boot_hooks(void)
     jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
     jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
     jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
+    jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
     jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
     jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
     jl_memory_exception    = jl_new_struct_uninit((jl_datatype_t*)core("OutOfMemoryError"));
@@ -845,6 +841,7 @@ static void post_boot_hooks(void)
     jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
     jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
     jl_initerror_type      = (jl_datatype_t*)core("InitError");
+    jl_pair_type           = core("Pair");
 
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
     jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
diff --git a/src/interpreter.c b/src/interpreter.c
index 5401cec75932f2..ea93527d88938a 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -78,28 +78,37 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
 {
     jl_value_t **args = jl_array_ptr_data(ex->args);
-    jl_sym_t *fname = (jl_sym_t*)args[0];
-    jl_module_t *modu = s->module;
-    if (jl_is_globalref(fname)) {
-        modu = jl_globalref_mod(fname);
-        fname = jl_globalref_name(fname);
-    }
-    assert(jl_expr_nargs(ex) != 1 || jl_is_symbol(fname));
 
-    if (jl_is_symbol(fname)) {
+    // generic function definition
+    if (jl_expr_nargs(ex) == 1) {
+        jl_value_t **args = jl_array_ptr_data(ex->args);
+        jl_sym_t *fname = (jl_sym_t*)args[0];
+        jl_module_t *modu = s->module;
+        if (jl_is_globalref(fname)) {
+            modu = jl_globalref_mod(fname);
+            fname = jl_globalref_name(fname);
+        }
+        if (!jl_is_symbol(fname)) {
+            jl_error("method: invalid declaration");
+        }
         jl_value_t *bp_owner = (jl_value_t*)modu;
         jl_binding_t *b = jl_get_binding_for_method_def(modu, fname);
         jl_value_t **bp = &b->value;
         jl_value_t *gf = jl_generic_function_def(b->name, b->owner, bp, bp_owner, b);
-        if (jl_expr_nargs(ex) == 1)
-            return gf;
+        return gf;
     }
 
-    jl_value_t *atypes = NULL, *meth = NULL;
-    JL_GC_PUSH2(&atypes, &meth);
+    jl_value_t *atypes = NULL, *meth = NULL, *fname = NULL;
+    JL_GC_PUSH3(&atypes, &meth, &fname);
+
+    fname = eval_value(args[0], s);
+    jl_methtable_t *mt = NULL;
+    if (jl_typeis(fname, jl_methtable_type)) {
+        mt = (jl_methtable_t*)fname;
+    }
     atypes = eval_value(args[1], s);
     meth = eval_value(args[2], s);
-    jl_method_def((jl_svec_t*)atypes, (jl_code_info_t*)meth, s->module);
+    jl_method_def((jl_svec_t*)atypes, mt, (jl_code_info_t*)meth, s->module);
     JL_GC_POP();
     return jl_nothing;
 }
@@ -208,6 +217,9 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     else if (head == invoke_sym) {
         return do_invoke(args, nargs, s);
     }
+    else if (head == invoke_modify_sym) {
+        return do_call(args + 1, nargs - 1, s);
+    }
     else if (head == isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
@@ -302,7 +314,8 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     else if (head == boundscheck_sym) {
         return jl_true;
     }
-    else if (head == meta_sym || head == coverageeffect_sym || head == inbounds_sym || head == loopinfo_sym) {
+    else if (head == meta_sym || head == coverageeffect_sym || head == inbounds_sym || head == loopinfo_sym ||
+             head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) {
         return jl_nothing;
     }
     else if (head == gc_preserve_begin_sym || head == gc_preserve_end_sym) {
@@ -404,13 +417,14 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 {
     jl_handler_t __eh;
     size_t ns = jl_array_len(stmts);
+    jl_task_t *ct = jl_current_task;
 
     while (1) {
         s->ip = ip;
         if (ip >= ns)
             jl_error("`body` expression must terminate in `return`. Use `block` instead.");
         if (toplevel)
-            jl_get_ptls_states()->world_age = jl_world_counter;
+            ct->world_age = jl_world_counter;
         jl_value_t *stmt = jl_array_ptr_ref(stmts, ip);
         assert(!jl_is_phinode(stmt));
         size_t next_ip = ip + 1;
@@ -506,7 +520,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                     s->continue_at = 0;
                     continue;
                 }
-                else { // a real exeception
+                else { // a real exception
                     ip = catch_ip;
                     continue;
                 }
@@ -515,8 +529,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                 int hand_n_leave = jl_unbox_long(jl_exprarg(stmt, 0));
                 assert(hand_n_leave > 0);
                 // equivalent to jl_pop_handler(hand_n_leave), but retaining eh for longjmp:
-                jl_ptls_t ptls = jl_get_ptls_states();
-                jl_handler_t *eh = ptls->current_task->eh;
+                jl_handler_t *eh = ct->eh;
                 while (--hand_n_leave > 0)
                     eh = eh->prev;
                 jl_eh_restore_state(eh);
@@ -713,9 +726,10 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t
     s->continue_at = 0;
     s->mi = NULL;
     JL_GC_ENABLEFRAME(s);
-    size_t last_age = jl_get_ptls_states()->world_age;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
     jl_value_t *r = eval_body(stmts, s, 0, 1);
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
     JL_GC_POP();
     return r;
 }
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index c0ee6ffad0ab8a..1847fc5c60e374 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -12,6 +12,7 @@ FunctionType *get_intr_args1(LLVMContext &C) { return FunctionType::get(T_prjlva
 FunctionType *get_intr_args2(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue, T_prjlvalue}, false); }
 FunctionType *get_intr_args3(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue, T_prjlvalue, T_prjlvalue}, false); }
 FunctionType *get_intr_args4(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue, T_prjlvalue, T_prjlvalue, T_prjlvalue}, false); }
+FunctionType *get_intr_args5(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue, T_prjlvalue, T_prjlvalue, T_prjlvalue, T_prjlvalue}, false); }
 
 static JuliaFunction *runtime_func[num_intrinsics] = {
 #define ADD_I(name, nargs) new JuliaFunction{"jl_"#name, get_intr_args##nargs, nullptr},
@@ -58,7 +59,6 @@ static void jl_init_intrinsic_functions_codegen(void)
     float_func[lt_float_fast] = true;
     float_func[le_float_fast] = true;
     float_func[fpiseq] = true;
-    float_func[fpislt] = true;
     float_func[abs_float] = true;
     float_func[copysign_float] = true;
     float_func[ceil_llvm] = true;
@@ -150,7 +150,7 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
     if (bt == jl_bool_type)
         return ConstantInt::get(T_int8, (*(const uint8_t*)ptr) ? 1 : 0);
 
-    Type *lt = julia_struct_to_llvm(ctx, (jl_value_t*)bt, NULL, NULL);
+    Type *lt = julia_struct_to_llvm(ctx, (jl_value_t*)bt, NULL);
 
     if (jl_is_vecelement_type((jl_value_t*)bt) && !jl_is_uniontype(jl_tparam0(bt)))
         bt = (jl_datatype_t*)jl_tparam0(bt);
@@ -199,16 +199,13 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
         unsigned llvm_idx = isa<StructType>(lt) ? convert_struct_offset(lt, offs) : i;
         while (fields.size() < llvm_idx)
             fields.push_back(
-#if JL_LLVM_VERSION >= 110000
                 UndefValue::get(GetElementPtrInst::getTypeAtIndex(lt, fields.size())));
-#else
-                UndefValue::get(cast<CompositeType>(lt)->getTypeAtIndex(fields.size())));
-#endif
         const uint8_t *ov = (const uint8_t*)ptr + offs;
         if (jl_is_uniontype(ft)) {
             // compute the same type layout as julia_struct_to_llvm
-            size_t fsz = jl_field_size(bt, i);
-            size_t al = jl_field_align(bt, i);
+            size_t fsz = 0, al = 0;
+            (void)jl_islayout_inline(ft, &fsz, &al);
+            fsz = jl_field_size(bt, i);
             uint8_t sel = ((const uint8_t*)ptr)[offs + fsz - 1];
             jl_value_t *active_ty = jl_nth_union_component(ft, sel);
             size_t active_sz = jl_datatype_size(active_ty);
@@ -285,6 +282,8 @@ static jl_cgval_t ghostValue(jl_value_t *ty);
 static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
 {
     Type *ty = unboxed->getType();
+    if (ty == to)
+        return unboxed;
     bool frompointer = ty->isPointerTy();
     bool topointer = to->isPointerTy();
     const DataLayout &DL = jl_data_layout;
@@ -304,6 +303,14 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
     if (frompointer && topointer) {
         unboxed = emit_bitcast(ctx, unboxed, to);
     }
+    else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
+        const DataLayout &DL = jl_data_layout;
+        unsigned nb = DL.getTypeSizeInBits(ty);
+        assert(nb == DL.getTypeSizeInBits(to));
+        AllocaInst *cast = ctx.builder.CreateAlloca(ty);
+        ctx.builder.CreateStore(unboxed, cast);
+        unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
+    }
     else if (frompointer) {
         Type *INTT_to = INTT(to);
         unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
@@ -316,7 +323,7 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
             unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
         unboxed = emit_inttoptr(ctx, unboxed, to);
     }
-    else if (ty != to) {
+    else {
         unboxed = ctx.builder.CreateBitCast(unboxed, to);
     }
     return unboxed;
@@ -577,25 +584,21 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
     jl_value_t *ety = jl_tparam0(aty);
     if (jl_is_typevar(ety))
         return emit_runtime_pointerref(ctx, argv);
-    if (!jl_is_datatype(ety))
-        ety = (jl_value_t*)jl_any_type;
+    if (!is_valid_intrinsic_elptr(ety)) {
+        emit_error(ctx, "pointerref: invalid pointer type");
+        return jl_cgval_t();
+    }
 
     Value *idx = emit_unbox(ctx, T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(T_size, 1));
 
     if (ety == (jl_value_t*)jl_any_type) {
         Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
-        return mark_julia_type(
-                ctx,
-                ctx.builder.CreateAlignedLoad(ctx.builder.CreateInBoundsGEP(T_prjlvalue, thePtr, im1), Align(align_nb)),
-                true,
-                ety);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.builder.CreateInBoundsGEP(T_prjlvalue, thePtr, im1), Align(align_nb));
+        tbaa_decorate(tbaa_data, load);
+        return mark_julia_type(ctx, load, true, ety);
     }
     else if (!jl_isbits(ety)) {
-        if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_concrete_type(ety)) {
-            emit_error(ctx, "pointerref: invalid pointer type");
-            return jl_cgval_t();
-        }
         assert(jl_is_datatype(ety));
         uint64_t size = jl_datatype_size(ety);
         Value *strct = emit_allocobj(ctx, size,
@@ -614,7 +617,7 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
             Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            return typed_load(ctx, thePtr, im1, ety, tbaa_data, nullptr, true, align_nb);
+            return typed_load(ctx, thePtr, im1, ety, tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, true, align_nb);
         }
         else {
             return ghostValue(ety);
@@ -649,8 +652,10 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         return emit_runtime_pointerset(ctx, argv);
     if (align.constant == NULL || !jl_is_long(align.constant))
         return emit_runtime_pointerset(ctx, argv);
-    if (!jl_is_datatype(ety))
-        ety = (jl_value_t*)jl_any_type;
+    if (!is_valid_intrinsic_elptr(ety)) {
+        emit_error(ctx, "pointerset: invalid pointer type");
+        return jl_cgval_t();
+    }
     emit_typecheck(ctx, x, ety, "pointerset");
 
     Value *idx = emit_unbox(ctx, T_size, i, (jl_value_t*)jl_long_type);
@@ -666,10 +671,6 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         tbaa_decorate(tbaa_data, store);
     }
     else if (!jl_isbits(ety)) {
-        if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_concrete_type(ety)) {
-            emit_error(ctx, "pointerset: invalid pointer type");
-            return jl_cgval_t();
-        }
         thePtr = emit_unbox(ctx, T_pint8, e, e.typ);
         uint64_t size = jl_datatype_size(ety);
         im1 = ctx.builder.CreateMul(im1, ConstantInt::get(T_size,
@@ -682,12 +683,181 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
             thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            typed_store(ctx, thePtr, im1, x, ety, tbaa_data, nullptr, nullptr, align_nb);
+            typed_store(ctx, thePtr, im1, x, jl_cgval_t(), ety, tbaa_data, nullptr, nullptr, isboxed,
+                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false, false, false, nullptr, "");
         }
     }
     return e;
 }
 
+static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, jl_cgval_t *argv)
+{
+    const jl_cgval_t &ord = argv[0];
+    if (ord.constant && jl_is_symbol(ord.constant)) {
+        enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, false, false);
+        if (order == jl_memory_order_invalid) {
+            emit_atomic_error(ctx, "invalid atomic ordering");
+            return jl_cgval_t(); // unreachable
+        }
+        if (order > jl_memory_order_monotonic)
+            ctx.builder.CreateFence(get_llvm_atomic_order(order));
+        return ghostValue(jl_nothing_type);
+    }
+    return emit_runtime_call(ctx, atomic_fence, argv, 1);
+}
+
+static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
+{
+    const jl_cgval_t &e = argv[0];
+    const jl_cgval_t &ord = argv[1];
+    jl_value_t *aty = e.typ;
+    if (!jl_is_cpointer_type(aty) || !ord.constant || !jl_is_symbol(ord.constant))
+        return emit_runtime_call(ctx, atomic_pointerref, argv, 2);
+    jl_value_t *ety = jl_tparam0(aty);
+    if (jl_is_typevar(ety))
+        return emit_runtime_call(ctx, atomic_pointerref, argv, 2);
+    enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    if (order == jl_memory_order_invalid) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        return jl_cgval_t(); // unreachable
+    }
+    AtomicOrdering llvm_order = get_llvm_atomic_order(order);
+
+    if (ety == (jl_value_t*)jl_any_type) {
+        Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(thePtr, Align(sizeof(jl_value_t*)));
+        tbaa_decorate(tbaa_data, load);
+        load->setOrdering(llvm_order);
+        return mark_julia_type(ctx, load, true, ety);
+    }
+
+    if (!is_valid_intrinsic_elptr(ety)) {
+        emit_error(ctx, "atomic_pointerref: invalid pointer type");
+        return jl_cgval_t();
+    }
+
+    size_t nb = jl_datatype_size(ety);
+    if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
+        emit_error(ctx, "atomic_pointerref: invalid pointer for atomic operation");
+        return jl_cgval_t();
+    }
+
+    if (!jl_isbits(ety)) {
+        assert(jl_is_datatype(ety));
+        uint64_t size = jl_datatype_size(ety);
+        Value *strct = emit_allocobj(ctx, size,
+                                     literal_pointer_val(ctx, ety));
+        Value *thePtr = emit_unbox(ctx, T_pint8, e, e.typ);
+        Type *loadT = Type::getIntNTy(jl_LLVMContext, nb * 8);
+        thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo());
+        MDNode *tbaa = best_tbaa(ety);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb));
+        tbaa_decorate(tbaa, load);
+        load->setOrdering(llvm_order);
+        thePtr = emit_bitcast(ctx, strct, thePtr->getType());
+        StoreInst *store = ctx.builder.CreateAlignedStore(load, thePtr, Align(julia_alignment(ety)));
+        tbaa_decorate(tbaa, store);
+        return mark_julia_type(ctx, strct, true, ety);
+    }
+    else {
+        bool isboxed;
+        Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
+        assert(!isboxed);
+        if (!type_is_ghost(ptrty)) {
+            Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
+            return typed_load(ctx, thePtr, nullptr, ety, tbaa_data, nullptr, isboxed, llvm_order, true, nb);
+        }
+        else {
+            if (order > jl_memory_order_monotonic)
+                ctx.builder.CreateFence(llvm_order);
+            return ghostValue(ety);
+        }
+    }
+}
+
+// e[i] = x (set)
+// e[i] <= x (swap)
+// e[i] y => x (replace)
+// x(e[i], y) (modify)
+static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs, const jl_cgval_t *modifyop)
+{
+    bool issetfield = f == atomic_pointerset;
+    bool isreplacefield = f == atomic_pointerreplace;
+    bool isswapfield = f == atomic_pointerswap;
+    bool ismodifyfield = f == atomic_pointermodify;
+    const jl_cgval_t undefval;
+    const jl_cgval_t &e = argv[0];
+    const jl_cgval_t &x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
+    const jl_cgval_t &y = isreplacefield || ismodifyfield ? argv[1] : undefval;
+    const jl_cgval_t &ord = isreplacefield || ismodifyfield ? argv[3] : argv[2];
+    const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;
+
+    jl_value_t *aty = e.typ;
+    if (!jl_is_cpointer_type(aty) || !ord.constant || !jl_is_symbol(ord.constant))
+        return emit_runtime_call(ctx, f, argv, nargs);
+    if (isreplacefield) {
+        if (!failord.constant || !jl_is_symbol(failord.constant))
+            return emit_runtime_call(ctx, f, argv, nargs);
+    }
+    jl_value_t *ety = jl_tparam0(aty);
+    if (jl_is_typevar(ety))
+        return emit_runtime_call(ctx, f, argv, nargs);
+    enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
+    enum jl_memory_order failorder = isreplacefield ? jl_get_atomic_order((jl_sym_t*)failord.constant, true, false) : order;
+    if (order == jl_memory_order_invalid || failorder == jl_memory_order_invalid || failorder > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        return jl_cgval_t(); // unreachable
+    }
+    AtomicOrdering llvm_order = get_llvm_atomic_order(order);
+    AtomicOrdering llvm_failorder = get_llvm_atomic_order(failorder);
+
+    if (ety == (jl_value_t*)jl_any_type) {
+        // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
+        // n.b.: the expected value (y) must be rooted, but not the others
+        Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
+        bool isboxed = true;
+        jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
+                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
+        if (issetfield)
+            ret = e;
+        return ret;
+    }
+
+    if (!is_valid_intrinsic_elptr(ety)) {
+        std::string msg(StringRef(jl_intrinsic_name((int)f)));
+        msg += ": invalid pointer type";
+        emit_error(ctx, msg);
+        return jl_cgval_t();
+    }
+    if (!ismodifyfield)
+        emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
+
+    size_t nb = jl_datatype_size(ety);
+    if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
+        std::string msg(StringRef(jl_intrinsic_name((int)f)));
+        msg += ": invalid pointer for atomic operation";
+        emit_error(ctx, msg);
+        return jl_cgval_t();
+    }
+
+    if (!jl_isbits(ety)) {
+        //Value *thePtr = emit_unbox(ctx, T_pint8, e, e.typ);
+        //uint64_t size = jl_datatype_size(ety);
+        return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations
+    }
+    else {
+        bool isboxed;
+        Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
+        assert(!isboxed);
+        Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
+        jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, tbaa_data, nullptr, nullptr, isboxed,
+                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
+        if (issetfield)
+            ret = e;
+        return ret;
+    }
+}
+
 static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den)
 {
     Type *t = den->getType();
@@ -915,6 +1085,15 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         return emit_pointerref(ctx, argv);
     case pointerset:
         return emit_pointerset(ctx, argv);
+    case atomic_fence:
+        return emit_atomicfence(ctx, argv);
+    case atomic_pointerref:
+        return emit_atomic_pointerref(ctx, argv);
+    case atomic_pointerset:
+    case atomic_pointerswap:
+    case atomic_pointermodify:
+    case atomic_pointerreplace:
+        return emit_atomic_pointerop(ctx, f, argv, nargs, nullptr);
     case bitcast:
         return generic_bitcast(ctx, argv);
     case trunc_int:
@@ -1165,26 +1344,6 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
                                 ctx.builder.CreateICmpEQ(xi, yi));
     }
 
-    case fpislt: {
-        *newtyp = jl_bool_type;
-        Type *it = INTT(t);
-        Value *xi = ctx.builder.CreateBitCast(x, it);
-        Value *yi = ctx.builder.CreateBitCast(y, it);
-        return ctx.builder.CreateOr(
-            ctx.builder.CreateAnd(
-                ctx.builder.CreateFCmpORD(x, x),
-                ctx.builder.CreateFCmpUNO(y, y)),
-            ctx.builder.CreateAnd(
-                ctx.builder.CreateFCmpORD(x, y),
-                ctx.builder.CreateOr(
-                    ctx.builder.CreateAnd(
-                        ctx.builder.CreateICmpSGE(xi, ConstantInt::get(it, 0)),
-                        ctx.builder.CreateICmpSLT(xi, yi)),
-                    ctx.builder.CreateAnd(
-                        ctx.builder.CreateICmpSLT(xi, ConstantInt::get(it, 0)),
-                        ctx.builder.CreateICmpUGT(xi, yi)))));
-    }
-
     case and_int: return ctx.builder.CreateAnd(x, y);
     case or_int:  return ctx.builder.CreateOr(x, y);
     case xor_int: return ctx.builder.CreateXor(x, y);
diff --git a/src/intrinsics.h b/src/intrinsics.h
index 1558769eb3643e..52988a313c990c 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -45,7 +45,6 @@
     ALIAS(lt_float_fast, lt_float) \
     ALIAS(le_float_fast, le_float) \
     ADD_I(fpiseq, 2) \
-    ADD_I(fpislt, 2) \
     /*  bitwise operators */ \
     ADD_I(and_int, 2) \
     ADD_I(or_int, 2) \
@@ -92,10 +91,17 @@
     /*  pointer access */ \
     ADD_I(pointerref, 3) \
     ADD_I(pointerset, 4) \
-    /* c interface */ \
+    /*  pointer atomics */ \
+    ADD_I(atomic_fence, 1) \
+    ADD_I(atomic_pointerref, 2) \
+    ADD_I(atomic_pointerset, 3) \
+    ADD_I(atomic_pointerswap, 3) \
+    ADD_I(atomic_pointermodify, 4) \
+    ADD_I(atomic_pointerreplace, 5) \
+    /*  c interface */ \
     ADD_I(cglobal, 2) \
     ALIAS(llvmcall, llvmcall) \
-    /* object access */ \
+    /*  object access */ \
     ADD_I(arraylen, 1) \
     /*  hidden intrinsics */ \
     ADD_HIDDEN(cglobal_auto, 1)
diff --git a/src/ircode.c b/src/ircode.c
index da78a3a8a327ae..212febe121a75f 100644
--- a/src/ircode.c
+++ b/src/ircode.c
@@ -608,20 +608,20 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         return jl_decode_value_phic(s, tag);
     case TAG_GOTONODE: JL_FALLTHROUGH; case TAG_QUOTENODE:
         v = jl_new_struct_uninit(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type);
-        set_nth_field(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type, (void*)v, 0, jl_decode_value(s));
+        set_nth_field(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type, v, 0, jl_decode_value(s), 0);
         return v;
     case TAG_GOTOIFNOT:
         v = jl_new_struct_uninit(jl_gotoifnot_type);
-        set_nth_field(jl_gotoifnot_type, (void*)v, 0, jl_decode_value(s));
-        set_nth_field(jl_gotoifnot_type, (void*)v, 1, jl_decode_value(s));
+        set_nth_field(jl_gotoifnot_type, v, 0, jl_decode_value(s), 0);
+        set_nth_field(jl_gotoifnot_type, v, 1, jl_decode_value(s), 0);
         return v;
     case TAG_ARGUMENT:
         v = jl_new_struct_uninit(jl_argument_type);
-        set_nth_field(jl_argument_type, (void*)v, 0, jl_decode_value(s));
+        set_nth_field(jl_argument_type, v, 0, jl_decode_value(s), 0);
         return v;
     case TAG_RETURNNODE:
         v = jl_new_struct_uninit(jl_returnnode_type);
-        set_nth_field(jl_returnnode_type, (void*)v, 0, jl_decode_value(s));
+        set_nth_field(jl_returnnode_type, v, 0, jl_decode_value(s), 0);
         return v;
     case TAG_SHORTER_INT64:
         v = jl_box_int64((int16_t)read_uint16(s->s));
@@ -670,7 +670,7 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         v = jl_new_struct_uninit(jl_lineinfonode_type);
         for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++) {
             //size_t offs = jl_field_offset(jl_lineinfonode_type, i);
-            set_nth_field(jl_lineinfonode_type, (void*)v, i, jl_decode_value(s));
+            set_nth_field(jl_lineinfonode_type, v, i, jl_decode_value(s), 0);
         }
         return v;
     default:
@@ -699,7 +699,7 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
     jl_ircode_state s = {
         &dest,
         m,
-        jl_get_ptls_states()
+        jl_current_task->ptls
     };
 
     uint8_t flags = (code->aggressive_constprop << 4)
@@ -783,7 +783,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
     jl_ircode_state s = {
         &src,
         m,
-        jl_get_ptls_states()
+        jl_current_task->ptls
     };
 
     jl_code_info_t *code = jl_new_code_info_uninit();
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index 3ac092d53fbc60..60b1903aaa802f 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -1,7 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// Except for parts of this file which were copied from LLVM, under the UIUC license (marked below).
-
 #include "llvm-version.h"
 #include "platform.h"
 
@@ -78,16 +76,16 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
 extern "C" JL_DLLEXPORT
 uint64_t jl_cumulative_compile_time_ns_before()
 {
-    int tid = jl_threadid();
-    jl_measure_compile_time[tid] = 1;
-    return jl_cumulative_compile_time[tid];
+    // Increment the flag to allow reentrant callers to `@time`.
+    jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
+    return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
 }
 extern "C" JL_DLLEXPORT
 uint64_t jl_cumulative_compile_time_ns_after()
 {
-    int tid = jl_threadid();
-    jl_measure_compile_time[tid] = 0;
-    return jl_cumulative_compile_time[tid];
+    // Decrement the flag when done measuring, allowing other callers to continue measuring.
+    jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
+    return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
 }
 
 // this generates llvm code for the lambda info
@@ -181,7 +179,7 @@ static jl_callptr_t _jl_compile_codeinst(
             jl_atomic_store_release(&this_code->invoke, addr);
         }
         else if (this_code->invoke == jl_fptr_const_return && !decls.specFunctionObject.empty()) {
-            // hack to export this pointer value to jl_dump_method_asm
+            // hack to export this pointer value to jl_dump_method_disasm
             this_code->specptr.fptr = (void*)getAddressForFunction(decls.specFunctionObject);
         }
         if (this_code== codeinst)
@@ -233,8 +231,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
 {
     JL_LOCK(&codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     jl_codegen_params_t params;
     jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
@@ -258,14 +256,12 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
         if (success && llvmmod == NULL)
             jl_add_to_ee(std::unique_ptr<Module>(into));
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     JL_UNLOCK(&codegen_lock);
     return success;
 }
 
-bool jl_type_mappable_to_c(jl_value_t *ty);
-
 // declare a C-callable entry point; called during code loading from the toplevel
 extern "C" JL_DLLEXPORT
 void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
@@ -292,7 +288,7 @@ void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
     size_t i, nargs = jl_nparams(sigt);
     for (i = 1; i < nargs; i++) {
         jl_value_t *ati = jl_tparam(sigt, i);
-        if (!jl_is_concrete_type(ati) || jl_is_kind(ati))
+        if (!jl_is_concrete_type(ati) || jl_is_kind(ati) || !jl_type_mappable_to_c(ati))
             jl_error("@ccallable: argument types must be concrete");
     }
 
@@ -317,8 +313,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
 {
     JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     // if we don't have any decls already, try to generate it now
     jl_code_info_t *src = NULL;
@@ -356,8 +352,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
     else {
         codeinst = NULL;
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     JL_UNLOCK(&codegen_lock);
     JL_GC_POP();
     return codeinst;
@@ -371,8 +367,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
     }
     JL_LOCK(&codegen_lock);
     uint64_t compiler_start_time = 0;
-    int tid = jl_threadid();
-    if (jl_measure_compile_time[tid])
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
         compiler_start_time = jl_hrtime();
     if (unspec->invoke == NULL) {
         jl_code_info_t *src = NULL;
@@ -400,8 +396,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
         }
         JL_GC_POP();
     }
-    if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
-        jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+    if (codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
     JL_UNLOCK(&codegen_lock); // Might GC
 }
 
@@ -409,14 +405,14 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
 // get a native disassembly for a compiled method
 extern "C" JL_DLLEXPORT
 jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
-        int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo)
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
 {
     // printing via disassembly
     jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
     if (codeinst) {
         uintptr_t fptr = (uintptr_t)codeinst->invoke;
         if (getwrapper)
-            return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo);
+            return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
         uintptr_t specfptr = (uintptr_t)codeinst->specptr.fptr;
         if (fptr == (uintptr_t)&jl_fptr_const_return && specfptr == 0) {
             // normally we prevent native code from being generated for these functions,
@@ -424,8 +420,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
             // so create an exception here so we can print pretty our lies
             JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
             uint64_t compiler_start_time = 0;
-            int tid = jl_threadid();
-            if (jl_measure_compile_time[tid])
+            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+            if (measure_compile_time_enabled)
                 compiler_start_time = jl_hrtime();
             specfptr = (uintptr_t)codeinst->specptr.fptr;
             if (specfptr == 0) {
@@ -450,18 +446,19 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
                 }
                 JL_GC_POP();
             }
-            if (jl_measure_compile_time[tid])
-                jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
+            if (measure_compile_time_enabled)
+                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
             JL_UNLOCK(&codegen_lock);
         }
         if (specfptr != 0)
-            return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo);
+            return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary);
     }
 
     // whatever, that didn't work - use the assembler output instead
-    if (raw_mc) // eh, give up, this flag doesn't really work anyways normally
-        return (jl_value_t*)jl_pchar_to_array("", 0);
-    return jl_dump_llvm_asm(jl_get_llvmf_defn(mi, world, getwrapper, true, jl_default_cgparams), asm_variant, debuginfo);
+    void *F = jl_get_llvmf_defn(mi, world, getwrapper, true, jl_default_cgparams);
+    if (!F)
+        return jl_an_empty_string;
+    return jl_dump_function_asm(F, raw_mc, asm_variant, debuginfo, binary);
 }
 
 // A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr
@@ -586,11 +583,13 @@ CompilerResultT JuliaOJIT::CompilerT::operator()(Module &M)
     JL_TIMING(LLVM_OPT);
 
     int optlevel;
+    int optlevel_min;
     if (jl_generating_output()) {
         optlevel = 0;
     }
     else {
         optlevel = jl_options.opt_level;
+        optlevel_min = jl_options.opt_level_min;
         for (auto &F : M.functions()) {
             if (!F.getBasicBlockList().empty()) {
                 Attribute attr = F.getFnAttribute("julia-optimization-level");
@@ -602,6 +601,7 @@ CompilerResultT JuliaOJIT::CompilerT::operator()(Module &M)
                 }
             }
         }
+        optlevel = std::max(optlevel, optlevel_min);
     }
     if (optlevel == 0)
         jit.PM0.run(M);
@@ -944,8 +944,7 @@ void jl_merge_module(Module *dest, std::unique_ptr<Module> src)
             //    continue;
             //}
             else {
-                assert(dG->isDeclaration() || (dG->getInitializer() == sG->getInitializer() &&
-                            dG->isConstant() && sG->isConstant()));
+                assert(dG->isDeclaration() || dG->getInitializer() == sG->getInitializer());
                 dG->replaceAllUsesWith(sG);
                 dG->eraseFromParent();
             }
diff --git a/src/jitlayers.h b/src/jitlayers.h
index b517711185e454..e3cc9245932ac4 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -144,11 +144,7 @@ typedef JITSymbol JL_SymbolInfo;
 using RTDyldObjHandleT = orc::VModuleKey;
 #endif
 
-#if JL_LLVM_VERSION >= 100000
 using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
-#else
-using CompilerResultT = std::unique_ptr<llvm::MemoryBuffer>;
-#endif
 
 class JuliaOJIT {
     struct CompilerT : public orc::IRCompileLayer::IRCompiler {
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index 1f10f6ef43991d..b8d5ae0e35b29f 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -5,17 +5,18 @@
     XX(jl_abstractstring_type) \
     XX(jl_an_empty_string) \
     XX(jl_an_empty_vec_any) \
-    XX(jl_any_type) \
     XX(jl_anytuple_type) \
     XX(jl_anytuple_type_type) \
-    XX(jl_argument_type) \
+    XX(jl_any_type) \
     XX(jl_argumenterror_type) \
+    XX(jl_argument_type) \
     XX(jl_array_any_type) \
     XX(jl_array_int32_type) \
     XX(jl_array_symbol_type) \
     XX(jl_array_type) \
     XX(jl_array_typename) \
     XX(jl_array_uint8_type) \
+    XX(jl_atomicerror_type) \
     XX(jl_base_module) \
     XX(jl_bool_type) \
     XX(jl_bottom_type) \
@@ -24,8 +25,8 @@
     XX(jl_char_type) \
     XX(jl_code_info_type) \
     XX(jl_code_instance_type) \
-    XX(jl_core_module) \
     XX(jl_const_type) \
+    XX(jl_core_module) \
     XX(jl_datatype_type) \
     XX(jl_densearray_type) \
     XX(jl_diverror_exception) \
@@ -48,6 +49,7 @@
     XX(jl_int32_type) \
     XX(jl_int64_type) \
     XX(jl_int8_type) \
+    XX(jl_interconditional_type) \
     XX(jl_interrupt_exception) \
     XX(jl_intrinsic_type) \
     XX(jl_lineinfonode_type) \
@@ -57,10 +59,10 @@
     XX(jl_loaderror_type) \
     XX(jl_main_module) \
     XX(jl_memory_exception) \
+    XX(jl_methoderror_type) \
     XX(jl_method_instance_type) \
     XX(jl_method_match_type) \
     XX(jl_method_type) \
-    XX(jl_methoderror_type) \
     XX(jl_methtable_type) \
     XX(jl_module_type) \
     XX(jl_namedtuple_type) \
@@ -70,9 +72,11 @@
     XX(jl_nothing) \
     XX(jl_nothing_type) \
     XX(jl_number_type) \
-    XX(jl_partial_struct_type) \
+    XX(jl_opaque_closure_type) \
+    XX(jl_opaque_closure_typename) \
+    XX(jl_pair_type) \
     XX(jl_partial_opaque_type) \
-    XX(jl_interconditional_type) \
+    XX(jl_partial_struct_type) \
     XX(jl_phicnode_type) \
     XX(jl_phinode_type) \
     XX(jl_pinode_type) \
@@ -94,20 +98,20 @@
     XX(jl_true) \
     XX(jl_tuple_typename) \
     XX(jl_tvar_type) \
-    XX(jl_type_type) \
-    XX(jl_type_type_mt) \
-    XX(jl_type_typename) \
     XX(jl_typedslot_type) \
     XX(jl_typeerror_type) \
     XX(jl_typemap_entry_type) \
     XX(jl_typemap_level_type) \
     XX(jl_typename_type) \
     XX(jl_typeofbottom_type) \
+    XX(jl_type_type) \
+    XX(jl_type_type_mt) \
+    XX(jl_type_typename) \
     XX(jl_uint16_type) \
     XX(jl_uint32_type) \
     XX(jl_uint64_type) \
-    XX(jl_uint8_type) \
     XX(jl_uint8pointer_type) \
+    XX(jl_uint8_type) \
     XX(jl_undefref_exception) \
     XX(jl_undefvarerror_type) \
     XX(jl_unionall_type) \
@@ -115,11 +119,9 @@
     XX(jl_upsilonnode_type) \
     XX(jl_vararg_type) \
     XX(jl_vecelement_typename) \
-    XX(jl_void_type) \
     XX(jl_voidpointer_type) \
+    XX(jl_void_type) \
     XX(jl_weakref_type) \
-    XX(jl_opaque_closure_type) \
-    XX(jl_opaque_closure_typename)
 
 // Data symbols that are defined inside the public libjulia
 #define JL_EXPORTED_DATA_SYMBOLS(XX) \
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index bf56eb19564a59..877c603c7ac3ed 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -1,12 +1,6 @@
 #define JL_EXPORTED_FUNCS(XX) \
-    XX(jl_) \
-    XX(jl_abs_float) \
-    XX(jl_abs_float_withtype) \
     XX(jl_active_task_stack) \
-    XX(jl_add_float) \
-    XX(jl_add_int) \
     XX(jl_add_optimization_passes) \
-    XX(jl_add_ptr) \
     XX(jl_add_standard_imports) \
     XX(jl_alignment) \
     XX(jl_alloc_array_1d) \
@@ -16,8 +10,8 @@
     XX(jl_alloc_svec) \
     XX(jl_alloc_svec_uninit) \
     XX(jl_alloc_vec_any) \
-    XX(jl_and_int) \
     XX(jl_apply_array_type) \
+    XX(jl_apply_cmpswap_type) \
     XX(jl_apply_generic) \
     XX(jl_apply_tuple_type) \
     XX(jl_apply_tuple_type_v) \
@@ -50,9 +44,14 @@
     XX(jl_array_to_string) \
     XX(jl_array_typetagdata) \
     XX(jl_arrayunset) \
-    XX(jl_ashr_int) \
     XX(jl_astaggedvalue) \
     XX(jl_atexit_hook) \
+    XX(jl_atomic_bool_cmpswap_bits) \
+    XX(jl_atomic_cmpswap_bits) \
+    XX(jl_atomic_error) \
+    XX(jl_atomic_new_bits) \
+    XX(jl_atomic_store_bits) \
+    XX(jl_atomic_swap_bits) \
     XX(jl_backtrace_from_here) \
     XX(jl_base_relative_to) \
     XX(jl_binding_owner) \
@@ -81,8 +80,6 @@
     XX(jl_box_uint8) \
     XX(jl_box_uint8pointer) \
     XX(jl_box_voidpointer) \
-    XX(jl_breakpoint) \
-    XX(jl_bswap_int) \
     XX(jl_call) \
     XX(jl_call0) \
     XX(jl_call1) \
@@ -96,16 +93,6 @@
     XX(jl_cglobal) \
     XX(jl_cglobal_auto) \
     XX(jl_checked_assignment) \
-    XX(jl_checked_sadd_int) \
-    XX(jl_checked_sdiv_int) \
-    XX(jl_checked_smul_int) \
-    XX(jl_checked_srem_int) \
-    XX(jl_checked_ssub_int) \
-    XX(jl_checked_uadd_int) \
-    XX(jl_checked_udiv_int) \
-    XX(jl_checked_umul_int) \
-    XX(jl_checked_urem_int) \
-    XX(jl_checked_usub_int) \
     XX(jl_clear_implicit_imports) \
     XX(jl_clear_malloc_data) \
     XX(jl_clock_now) \
@@ -117,9 +104,6 @@
     XX(jl_compute_fieldtypes) \
     XX(jl_copy_ast) \
     XX(jl_copy_code_info) \
-    XX(jl_copysign_float) \
-    XX(jl_cpuid) \
-    XX(jl_cpuidex) \
     XX(jl_cpu_pause) \
     XX(jl_cpu_threads) \
     XX(jl_cpu_wake) \
@@ -127,23 +111,19 @@
     XX(jl_create_native) \
     XX(jl_create_system_image) \
     XX(jl_cstr_to_string) \
-    XX(jl_ctlz_int) \
-    XX(jl_ctpop_int) \
-    XX(jl_cttz_int) \
     XX(jl_current_exception) \
     XX(jl_debug_method_invalidation) \
     XX(jl_declare_constant) \
     XX(jl_defines_or_exports_p) \
     XX(jl_deprecate_binding) \
-    XX(jl_div_float) \
     XX(jl_dlclose) \
     XX(jl_dlopen) \
     XX(jl_dlsym) \
     XX(jl_dump_compiles) \
     XX(jl_dump_fptr_asm) \
+    XX(jl_dump_function_asm) \
     XX(jl_dump_function_ir) \
     XX(jl_dump_host_cpu) \
-    XX(jl_dump_llvm_asm) \
     XX(jl_dump_method_asm) \
     XX(jl_egal) \
     XX(jl_egal__bits) \
@@ -154,8 +134,6 @@
     XX(jl_enter_threaded_region) \
     XX(jl_environ) \
     XX(jl_eof_error) \
-    XX(jl_eq_float) \
-    XX(jl_eq_int) \
     XX(jl_eqtable_get) \
     XX(jl_eqtable_nextind) \
     XX(jl_eqtable_pop) \
@@ -172,89 +150,14 @@
     XX(jl_exit_on_sigint) \
     XX(jl_exit_threaded_region) \
     XX(jl_expand) \
+    XX(jl_resolve_globals_in_ir) \
     XX(jl_expand_and_resolve) \
     XX(jl_expand_stmt) \
     XX(jl_expand_stmt_with_loc) \
     XX(jl_expand_with_loc) \
     XX(jl_expand_with_loc_warn) \
     XX(jl_extern_c) \
-    XX(jl_f__abstracttype) \
-    XX(jl_f__apply) \
-    XX(jl_f__apply_iterate) \
-    XX(jl_f__apply_pure) \
-    XX(jl_f__call_in_world) \
-    XX(jl_f__call_latest) \
-    XX(jl_f_applicable) \
-    XX(jl_f_apply_type) \
-    XX(jl_f_arrayref) \
-    XX(jl_f_arrayset) \
-    XX(jl_f_arraysize) \
-    XX(jl_f_const_arrayref) \
-    XX(jl_f__equiv_typedef) \
-    XX(jl_f__expr) \
-    XX(jl_f_fieldtype) \
-    XX(jl_f_getfield) \
     XX(jl_field_index) \
-    XX(jl_field_isdefined) \
-    XX(jl_f_ifelse) \
-    XX(jl_finalize) \
-    XX(jl_finalize_th) \
-    XX(jl_find_free_typevars) \
-    XX(jl_f_intrinsic_call) \
-    XX(jl_f_invoke) \
-    XX(jl_f_invoke_kwsorter) \
-    XX(jl_first_argument_datatype) \
-    XX(jl_f_is) \
-    XX(jl_f_isa) \
-    XX(jl_f_isdefined) \
-    XX(jl_f_issubtype) \
-    XX(jl_flipsign_int) \
-    XX(jl_floor_llvm) \
-    XX(jl_floor_llvm_withtype) \
-    XX(jl_fl_parse) \
-    XX(jl_flush_cstdio) \
-    XX(jl_fma_float) \
-    XX(jl_f_new_module) \
-    XX(jl_f_nfields) \
-    XX(jl_forceclose_uv) \
-    XX(jl_format_filename) \
-    XX(jl_fpext) \
-    XX(jl_fpiseq) \
-    XX(jl_fpislt) \
-    XX(jl_f__primitivetype) \
-    XX(jl_fptosi) \
-    XX(jl_fptoui) \
-    XX(jl_fptr_args) \
-    XX(jl_fptr_const_return) \
-    XX(jl_fptr_interpret_call) \
-    XX(jl_fptr_sparam) \
-    XX(jl_fptrunc) \
-    XX(jl_free) \
-    XX(jl_free_stack) \
-    XX(jl_fs_access) \
-    XX(jl_fs_chmod) \
-    XX(jl_fs_chown) \
-    XX(jl_fs_close) \
-    XX(jl_f_setfield) \
-    XX(jl_f__setsuper) \
-    XX(jl_f_sizeof) \
-    XX(jl_fs_read) \
-    XX(jl_fs_read_byte) \
-    XX(jl_fs_rename) \
-    XX(jl_fs_sendfile) \
-    XX(jl_fs_symlink) \
-    XX(jl_fstat) \
-    XX(jl_f__structtype) \
-    XX(jl_fs_unlink) \
-    XX(jl_f_svec) \
-    XX(jl_fs_write) \
-    XX(jl_f_throw) \
-    XX(jl_ftruncate) \
-    XX(jl_f_tuple) \
-    XX(jl_f_typeassert) \
-    XX(jl_f__typebody) \
-    XX(jl_f_typeof) \
-    XX(jl_f__typevar) \
     XX(jl_gc_add_finalizer) \
     XX(jl_gc_add_finalizer_th) \
     XX(jl_gc_add_ptr_finalizer) \
@@ -311,7 +214,6 @@
     XX(jl_generating_output) \
     XX(jl_generic_function_def) \
     XX(jl_gensym) \
-    XX(jl_getaddrinfo) \
     XX(jl_getallocationgranularity) \
     XX(jl_get_ARCH) \
     XX(jl_get_backtrace) \
@@ -353,6 +255,7 @@
     XX(jl_get_nth_field_checked) \
     XX(jl_get_nth_field_noalloc) \
     XX(jl_getpagesize) \
+    XX(jl_get_pgcstack) \
     XX(jl_getpid) \
     XX(jl_get_ptls_states) \
     XX(jl_get_root_symbol) \
@@ -379,8 +282,10 @@
     XX(jl_id_start_char) \
     XX(jl_idtable_rehash) \
     XX(jl_infer_thunk) \
-    XX(jl_init_restored_modules) \
+    XX(jl_init) \
     XX(jl_init__threading) \
+    XX(jl_init_restored_modules) \
+    XX(jl_init_with_image) \
     XX(jl_init_with_image__threading) \
     XX(jl_install_sigint_handler) \
     XX(jl_instantiate_type_in_env) \
@@ -419,12 +324,7 @@
     XX(jl_is_unary_and_binary_operator) \
     XX(jl_is_unary_operator) \
     XX(jl_lazy_load_and_lookup) \
-    XX(jl_le_float) \
     XX(jl_lisp_prompt) \
-    XX(jl_LLVMCreateDisasm) \
-    XX(jl_LLVMDisasmInstruction) \
-    XX(jl_LLVMFlipSign) \
-    XX(jl_LLVMSMod) \
     XX(jl_load) \
     XX(jl_load_) \
     XX(jl_load_and_lookup) \
@@ -432,9 +332,7 @@
     XX(jl_load_file_string) \
     XX(jl_lookup_code_address) \
     XX(jl_lseek) \
-    XX(jl_lshr_int) \
     XX(jl_lstat) \
-    XX(jl_lt_float) \
     XX(jl_macroexpand) \
     XX(jl_macroexpand1) \
     XX(jl_malloc) \
@@ -449,7 +347,6 @@
     XX(jl_method_table_insert) \
     XX(jl_methtable_lookup) \
     XX(jl_mi_cache_insert) \
-    XX(jl_mmap) \
     XX(jl_module_build_id) \
     XX(jl_module_export) \
     XX(jl_module_exports_p) \
@@ -462,16 +359,8 @@
     XX(jl_module_using) \
     XX(jl_module_usings) \
     XX(jl_module_uuid) \
-    XX(jl_muladd_float) \
-    XX(jl_mul_float) \
-    XX(jl_mul_int) \
     XX(jl_native_alignment) \
     XX(jl_nb_available) \
-    XX(jl_ne_float) \
-    XX(jl_neg_float) \
-    XX(jl_neg_float_withtype) \
-    XX(jl_neg_int) \
-    XX(jl_ne_int) \
     XX(jl_new_array) \
     XX(jl_new_bits) \
     XX(jl_new_code_info_uninit) \
@@ -493,13 +382,11 @@
     XX(jl_next_from_addrinfo) \
     XX(jl_no_exc_handler) \
     XX(jl_normalize_to_compilable_sig) \
-    XX(jl_not_int) \
     XX(jl_object_id) \
     XX(jl_object_id_) \
     XX(jl_obvious_subtype) \
     XX(jl_operator_precedence) \
     XX(jl_op_suffix_char) \
-    XX(jl_or_int) \
     XX(jl_parse) \
     XX(jl_parse_all) \
     XX(jl_parse_input_line) \
@@ -513,6 +400,7 @@
     XX(jl_pop_handler) \
     XX(jl_preload_sysimg_so) \
     XX(jl_prepend_cwd) \
+    XX(jl_print_backtrace) \
     XX(jl_printf) \
     XX(jl_process_events) \
     XX(jl_profile_clear_data) \
@@ -527,15 +415,12 @@
     XX(jl_ptrarrayref) \
     XX(jl_ptr_to_array) \
     XX(jl_ptr_to_array_1d) \
-    XX(jl_pwrite) \
     XX(jl_queue_work) \
     XX(jl_raise_debugger) \
     XX(jl_readuntil) \
     XX(jl_read_verify_header) \
     XX(jl_realloc) \
     XX(jl_register_newmeth_tracer) \
-    XX(jl_rem_float) \
-    XX(jl_repl_raise_sigtstp) \
     XX(jl_reshape_array) \
     XX(jl_restore_excstack) \
     XX(jl_restore_incremental) \
@@ -545,14 +430,11 @@
     XX(jl_rethrow) \
     XX(jl_rethrow_other) \
     XX(jl_rettype_inferred) \
-    XX(jl_rint_llvm) \
-    XX(jl_rint_llvm_withtype) \
     XX(jl_running_on_valgrind) \
     XX(jl_safe_printf) \
     XX(jl_save_incremental) \
     XX(jl_save_system_image) \
     XX(jl_SC_CLK_TCK) \
-    XX(jl_sdiv_int) \
     XX(jl_set_ARGS) \
     XX(jl_set_const) \
     XX(jl_set_errno) \
@@ -565,58 +447,19 @@
     XX(jl_set_module_uuid) \
     XX(jl_set_next_task) \
     XX(jl_set_nth_field) \
-    XX(jl_set_ptls_states_getter) \
     XX(jl_set_safe_restore) \
     XX(jl_set_sysimg_so) \
     XX(jl_set_task_tid) \
     XX(jl_set_typeinf_func) \
     XX(jl_set_zero_subnormals) \
-    XX(jl_sext_int) \
-    XX(jl_shl_int) \
     XX(jl_sigatomic_begin) \
     XX(jl_sigatomic_end) \
     XX(jl_sig_throw) \
-    XX(jl_sitofp) \
-    XX(jl_sizeof_ios_t) \
-    XX(jl_sizeof_jl_options) \
-    XX(jl_sizeof_mode_t) \
-    XX(jl_sizeof_off_t) \
-    XX(jl_sizeof_stat) \
-    XX(jl_sizeof_uv_fs_t) \
-    XX(jl_sle_int) \
-    XX(jl_slt_int) \
-    XX(jl_smod_int) \
-    XX(jl_sockaddr_from_addrinfo) \
-    XX(jl_sockaddr_host4) \
-    XX(jl_sockaddr_host6) \
-    XX(jl_sockaddr_is_ip4) \
-    XX(jl_sockaddr_is_ip6) \
-    XX(jl_sockaddr_port4) \
-    XX(jl_sockaddr_port6) \
-    XX(jl_sockaddr_set_port) \
     XX(jl_spawn) \
     XX(jl_specializations_get_linfo) \
     XX(jl_specializations_lookup) \
-    XX(jl_sqrt_llvm) \
-    XX(jl_sqrt_llvm_fast) \
-    XX(jl_sqrt_llvm_fast_withtype) \
-    XX(jl_sqrt_llvm_withtype) \
-    XX(jl_srem_int) \
-    XX(jl_stat) \
-    XX(jl_stat_blksize) \
-    XX(jl_stat_blocks) \
-    XX(jl_stat_ctime) \
-    XX(jl_stat_dev) \
-    XX(jl_stat_gid) \
     XX(jl_static_show) \
     XX(jl_static_show_func_sig) \
-    XX(jl_stat_ino) \
-    XX(jl_stat_mode) \
-    XX(jl_stat_mtime) \
-    XX(jl_stat_nlink) \
-    XX(jl_stat_rdev) \
-    XX(jl_stat_size) \
-    XX(jl_stat_uid) \
     XX(jl_stderr_obj) \
     XX(jl_stderr_stream) \
     XX(jl_stdin_stream) \
@@ -627,9 +470,6 @@
     XX(jl_string_to_array) \
     XX(jl_strtod_c) \
     XX(jl_strtof_c) \
-    XX(jl_sub_float) \
-    XX(jl_sub_int) \
-    XX(jl_sub_ptr) \
     XX(jl_substrtod) \
     XX(jl_substrtof) \
     XX(jl_subtype) \
@@ -653,12 +493,6 @@
     XX(jl_take_buffer) \
     XX(jl_task_get_next) \
     XX(jl_task_stack_buffer) \
-    XX(jl_tcp_bind) \
-    XX(jl_tcp_connect) \
-    XX(jl_tcp_getpeername) \
-    XX(jl_tcp_getsockname) \
-    XX(jl_tcp_quickack) \
-    XX(jl_tcp_reuseport) \
     XX(jl_test_cpu_feature) \
     XX(jl_threadid) \
     XX(jl_threading_enabled) \
@@ -668,9 +502,6 @@
     XX(jl_too_many_args) \
     XX(jl_toplevel_eval) \
     XX(jl_toplevel_eval_in) \
-    XX(jl_trunc_int) \
-    XX(jl_trunc_llvm) \
-    XX(jl_trunc_llvm_withtype) \
     XX(jl_try_substrtod) \
     XX(jl_try_substrtof) \
     XX(jl_tty_set_mode) \
@@ -683,7 +514,6 @@
     XX(jl_typeinf_end) \
     XX(jl_type_intersection) \
     XX(jl_type_intersection_with_env) \
-    XX(jl_typemax_uint) \
     XX(jl_type_morespecific) \
     XX(jl_type_morespecific_no_subtype) \
     XX(jl_typename_str) \
@@ -693,12 +523,6 @@
     XX(jl_type_to_llvm) \
     XX(jl_type_union) \
     XX(jl_type_unionall) \
-    XX(jl_udiv_int) \
-    XX(jl_udp_bind) \
-    XX(jl_udp_send) \
-    XX(jl_uitofp) \
-    XX(jl_ule_int) \
-    XX(jl_ult_int) \
     XX(jl_unbox_bool) \
     XX(jl_unbox_float32) \
     XX(jl_unbox_float64) \
@@ -716,36 +540,6 @@
     XX(jl_uncompress_argnames) \
     XX(jl_uncompress_ir) \
     XX(jl_undefined_var_error) \
-    XX(jl_urem_int) \
-    XX(jl_uv_associate_julia_struct) \
-    XX(jl_uv_buf_base) \
-    XX(jl_uv_buf_len) \
-    XX(jl_uv_buf_set_base) \
-    XX(jl_uv_buf_set_len) \
-    XX(jl_uv_connect_handle) \
-    XX(jl_uv_disassociate_julia_struct) \
-    XX(jl_uv_file_handle) \
-    XX(jl_uv_flush) \
-    XX(jl_uv_fs_t_path) \
-    XX(jl_uv_fs_t_ptr) \
-    XX(jl_uv_handle) \
-    XX(jl_uv_handle_data) \
-    XX(jl_uv_handle_type) \
-    XX(jl_uv_interface_addresses) \
-    XX(jl_uv_interface_address_is_internal) \
-    XX(jl_uv_interface_address_sockaddr) \
-    XX(jl_uv_process_data) \
-    XX(jl_uv_process_pid) \
-    XX(jl_uv_putb) \
-    XX(jl_uv_putc) \
-    XX(jl_uv_puts) \
-    XX(jl_uv_req_data) \
-    XX(jl_uv_req_set_data) \
-    XX(jl_uv_sizeof_interface_address) \
-    XX(jl_uv_unix_fd_is_watched) \
-    XX(jl_uv_write) \
-    XX(jl_uv_writecb) \
-    XX(jl_uv_write_handle) \
     XX(jl_valueof) \
     XX(jl_value_ptr) \
     XX(jl_ver_is_release) \
@@ -756,6 +550,5 @@
     XX(jl_vexceptionf) \
     XX(jl_vprintf) \
     XX(jl_wakeup_thread) \
-    XX(jl_xor_int) \
     XX(jl_yield) \
-    XX(jl_zext_int)
+
diff --git a/src/jl_uv.c b/src/jl_uv.c
index 35f83b3314e91b..719d3bf9c60101 100644
--- a/src/jl_uv.c
+++ b/src/jl_uv.c
@@ -105,11 +105,11 @@ static void jl_uv_closeHandle(uv_handle_t *handle)
         JL_STDERR = (JL_STREAM*)STDERR_FILENO;
     // also let the client app do its own cleanup
     if (handle->type != UV_FILE && handle->data) {
-        jl_ptls_t ptls = jl_get_ptls_states();
-        size_t last_age = ptls->world_age;
-        ptls->world_age = jl_world_counter;
+        jl_task_t *ct = jl_current_task;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_world_counter;
         jl_uv_call_close_callback((jl_value_t*)handle->data);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     if (handle == (uv_handle_t*)&signal_async)
         return;
@@ -205,16 +205,17 @@ extern volatile unsigned _threadedregion;
 
 JL_DLLEXPORT int jl_process_events(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     uv_loop_t *loop = jl_io_loop;
-    if (loop && (_threadedregion || ptls->tid == 0)) {
-        jl_gc_safepoint_(ptls);
+    jl_gc_safepoint_(ct->ptls);
+    if (loop && (_threadedregion || ct->tid == 0)) {
         if (jl_atomic_load(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) {
             loop->stop_flag = 0;
             int r = uv_run(loop, UV_RUN_NOWAIT);
             JL_UV_UNLOCK();
             return r;
         }
+        jl_gc_safepoint_(ct->ptls);
     }
     return 0;
 }
@@ -368,6 +369,14 @@ JL_DLLEXPORT int jl_fs_sendfile(uv_os_fd_t src_fd, uv_os_fd_t dst_fd,
     return ret;
 }
 
+JL_DLLEXPORT int jl_fs_hardlink(char *path, char *new_path)
+{
+    uv_fs_t req;
+    int ret = uv_fs_link(unused_uv_loop_arg, &req, path, new_path, NULL);
+    uv_fs_req_cleanup(&req);
+    return ret;
+}
+
 JL_DLLEXPORT int jl_fs_symlink(char *path, char *new_path, int flags)
 {
     uv_fs_t req;
@@ -403,9 +412,9 @@ JL_DLLEXPORT int jl_fs_access(char *path, int mode)
 JL_DLLEXPORT int jl_fs_write(uv_os_fd_t handle, const char *data, size_t len,
                              int64_t offset) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_get_current_task();
     // TODO: fix this cheating
-    if (ptls->safe_restore || ptls->tid != 0)
+    if (jl_get_safe_restore() || ct == NULL || ct->tid != 0)
 #ifdef _OS_WINDOWS_
         return WriteFile(handle, data, len, NULL, NULL);
 #else
@@ -504,8 +513,8 @@ JL_DLLEXPORT void jl_uv_puts(uv_stream_t *stream, const char *str, size_t n)
     }
 
     // TODO: Hack to make CoreIO thread-safer
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->tid != 0) {
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL || ct->tid != 0) {
         if (stream == JL_STDOUT) {
             fd = UV_STDOUT_FD;
         }
@@ -965,12 +974,13 @@ JL_DLLEXPORT int jl_tty_set_mode(uv_tty_t *handle, int mode)
     return uv_tty_set_mode(handle, mode_enum);
 }
 
-typedef int (*work_cb_t)(void *, void *);
+typedef int (*work_cb_t)(void *, void *, void *);
 typedef void (*notify_cb_t)(int);
 
 struct work_baton {
     uv_work_t req;
     work_cb_t work_func;
+    void      *ccall_fptr;
     void      *work_args;
     void      *work_retval;
     notify_cb_t notify_func;
@@ -984,7 +994,7 @@ struct work_baton {
 void jl_work_wrapper(uv_work_t *req)
 {
     struct work_baton *baton = (struct work_baton*) req->data;
-    baton->work_func(baton->work_args, baton->work_retval);
+    baton->work_func(baton->ccall_fptr, baton->work_args, baton->work_retval);
 }
 
 void jl_work_notifier(uv_work_t *req, int status)
@@ -994,12 +1004,13 @@ void jl_work_notifier(uv_work_t *req, int status)
     free(baton);
 }
 
-JL_DLLEXPORT int jl_queue_work(work_cb_t work_func, void *work_args, void *work_retval,
+JL_DLLEXPORT int jl_queue_work(work_cb_t work_func, void *ccall_fptr, void *work_args, void *work_retval,
                                notify_cb_t notify_func, int notify_idx)
 {
     struct work_baton *baton = (struct work_baton*)malloc_s(sizeof(struct work_baton));
     baton->req.data = (void*) baton;
     baton->work_func = work_func;
+    baton->ccall_fptr = ccall_fptr;
     baton->work_args = work_args;
     baton->work_retval = work_retval;
     baton->notify_func = notify_func;
diff --git a/src/jlapi.c b/src/jlapi.c
index d27f7e03d958ce..4851cebfe15d9a 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -91,6 +91,19 @@ JL_DLLEXPORT void jl_init(void)
     free(libbindir);
 }
 
+// HACK: remove this for Julia 1.8 (see <https://github.com/JuliaLang/julia/issues/40730>)
+JL_DLLEXPORT void jl_init__threading(void)
+{
+    jl_init();
+}
+
+// HACK: remove this for Julia 1.8 (see <https://github.com/JuliaLang/julia/issues/40730>)
+JL_DLLEXPORT void jl_init_with_image__threading(const char *julia_bindir,
+                                     const char *image_relative_path)
+{
+    jl_init_with_image(julia_bindir, image_relative_path);
+}
+
 JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
 {
     jl_value_t *r;
@@ -104,7 +117,7 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        jl_current_task->ptls->previous_exception = jl_current_exception();
         r = NULL;
     }
     return r;
@@ -112,18 +125,18 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
 
 JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
 {
-    jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack;
+    jl_excstack_t *s = jl_current_task->excstack;
     return s && s->top != 0 ? jl_excstack_exception(s, s->top) : jl_nothing;
 }
 
 JL_DLLEXPORT jl_value_t *jl_exception_occurred(void)
 {
-    return jl_get_ptls_states()->previous_exception;
+    return jl_current_task->ptls->previous_exception;
 }
 
 JL_DLLEXPORT void jl_exception_clear(void)
 {
-    jl_get_ptls_states()->previous_exception = NULL;
+    jl_current_task->ptls->previous_exception = NULL;
 }
 
 // get the name of a type as a string
@@ -163,6 +176,7 @@ JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s)
 JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t nargs)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     nargs++; // add f to args
     JL_TRY {
         jl_value_t **argv;
@@ -170,15 +184,15 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t na
         argv[0] = (jl_value_t*)f;
         for (int i = 1; i < nargs; i++)
             argv[i] = args[i - 1];
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, nargs);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -187,17 +201,18 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t na
 JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         JL_GC_PUSH1(&f);
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
         v = jl_apply_generic(f, NULL, 0);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -206,20 +221,21 @@ JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
 JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 2);
         argv[0] = f;
         argv[1] = a;
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, 2);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -228,21 +244,22 @@ JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
 JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 3);
         argv[0] = f;
         argv[1] = a;
         argv[2] = b;
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, 3);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -259,15 +276,16 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
         argv[1] = a;
         argv[2] = b;
         argv[3] = c;
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        jl_task_t *ct = jl_current_task;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, 4);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        jl_current_task->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -292,7 +310,7 @@ JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld)
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        jl_current_task->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -305,8 +323,8 @@ JL_DLLEXPORT void jl_sigatomic_begin(void)
 
 JL_DLLEXPORT void jl_sigatomic_end(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->defer_signal == 0)
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->defer_signal == 0)
         jl_error("sigatomic_end called in non-sigatomic region");
     JL_SIGATOMIC_END();
 }
@@ -418,33 +436,33 @@ JL_DLLEXPORT jl_value_t *(jl_get_fieldtypes)(jl_value_t *v)
 #ifndef __clang_analyzer__
 JL_DLLEXPORT int8_t (jl_gc_unsafe_enter)(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return jl_gc_unsafe_enter(ptls);
+    jl_task_t *ct = jl_current_task;
+    return jl_gc_unsafe_enter(ct->ptls);
 }
 
 JL_DLLEXPORT void (jl_gc_unsafe_leave)(int8_t state)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_gc_unsafe_leave(ptls, state);
+    jl_task_t *ct = jl_current_task;
+    jl_gc_unsafe_leave(ct->ptls, state);
 }
 
 JL_DLLEXPORT int8_t (jl_gc_safe_enter)(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return jl_gc_safe_enter(ptls);
+    jl_task_t *ct = jl_current_task;
+    return jl_gc_safe_enter(ct->ptls);
 }
 
 JL_DLLEXPORT void (jl_gc_safe_leave)(int8_t state)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_gc_safe_leave(ptls, state);
+    jl_task_t *ct = jl_current_task;
+    jl_gc_safe_leave(ct->ptls, state);
 }
 #endif
 
 JL_DLLEXPORT void (jl_gc_safepoint)(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_gc_safepoint_(ptls);
+    jl_task_t *ct = jl_current_task;
+    jl_gc_safepoint_(ct->ptls);
 }
 
 JL_DLLEXPORT void (jl_cpu_pause)(void)
@@ -483,25 +501,13 @@ JL_DLLEXPORT int jl_set_fenv_rounding(int i)
     return fesetround(i);
 }
 
-
-#ifdef JL_ASAN_ENABLED
-JL_DLLEXPORT const char* __asan_default_options()
-{
-    return "allow_user_segv_handler=1:detect_leaks=0";
-    // FIXME: enable LSAN after fixing leaks & defining __lsan_default_suppressions(),
-    //        or defining __lsan_default_options = exitcode=0 once publicly available
-    //        (here and in flisp/flmain.c)
-}
-#endif
-
 static int exec_program(char *program)
 {
     JL_TRY {
         jl_load(jl_main_module, program);
     }
     JL_CATCH {
-        // TODO: It is possible for this output
-        //       to be mangled due to `jlbacktrace`
+        // TODO: It is possible for this output to be mangled due to `jl_print_backtrace`
         //       printing directly to STDERR_FILENO.
         int shown_err = 0;
         jl_printf(JL_STDERR, "error during bootstrap:\n");
@@ -520,32 +526,13 @@ static int exec_program(char *program)
             jl_static_show((JL_STREAM*)STDERR_FILENO, exc);
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         }
-        jlbacktrace(); // written to STDERR_FILENO
+        jl_print_backtrace(); // written to STDERR_FILENO
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         return 1;
     }
     return 0;
 }
 
-#ifdef JL_GF_PROFILE
-static void print_profile(void)
-{
-    size_t i;
-    void **table = jl_base_module->bindings.table;
-    for(i=1; i < jl_base_module->bindings.size; i+=2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            if (b->value != NULL && jl_is_function(b->value) &&
-                jl_is_gf(b->value)) {
-                jl_printf(JL_STDERR, "%d\t%s\n",
-                           jl_gf_mtable(b->value)->ncalls,
-                           jl_gf_name(b->value)->name);
-            }
-        }
-    }
-}
-#endif
-
 static NOINLINE int true_main(int argc, char *argv[])
 {
     jl_set_ARGS(argc, argv);
@@ -555,10 +542,11 @@ static NOINLINE int true_main(int argc, char *argv[])
 
     if (start_client) {
         JL_TRY {
-            size_t last_age = jl_get_ptls_states()->world_age;
-            jl_get_ptls_states()->world_age = jl_get_world_counter();
+            jl_task_t *ct = jl_current_task;
+            size_t last_age = ct->world_age;
+            ct->world_age = jl_get_world_counter();
             jl_apply(&start_client, 1);
-            jl_get_ptls_states()->world_age = last_age;
+            ct->world_age = last_age;
         }
         JL_CATCH {
             jl_no_exc_handler(jl_current_exception());
@@ -573,9 +561,9 @@ static NOINLINE int true_main(int argc, char *argv[])
         }
     }
 
-    ios_puts("WARNING: Base._start not defined, falling back to economy mode repl.\n", ios_stdout);
+    jl_printf(JL_STDOUT, "WARNING: Base._start not defined, falling back to economy mode repl.\n");
     if (!jl_errorexception_type)
-        ios_puts("WARNING: jl_errorexception_type not defined; any errors will be fatal.\n", ios_stdout);
+        jl_printf(JL_STDOUT, "WARNING: jl_errorexception_type not defined; any errors will be fatal.\n");
 
     while (!ios_eof(ios_stdin)) {
         char *volatile line = NULL;
@@ -597,7 +585,7 @@ static NOINLINE int true_main(int argc, char *argv[])
             jl_printf(JL_STDOUT, "\n");
             free(line);
             line = NULL;
-            uv_run(jl_global_event_loop(),UV_RUN_NOWAIT);
+            jl_process_events();
         }
         JL_CATCH {
             if (line) {
@@ -607,7 +595,7 @@ static NOINLINE int true_main(int argc, char *argv[])
             jl_printf((JL_STREAM*)STDERR_FILENO, "\nparser error:\n");
             jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
             jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
-            jlbacktrace(); // written to STDERR_FILENO
+            jl_print_backtrace(); // written to STDERR_FILENO
         }
     }
     return 0;
@@ -665,7 +653,7 @@ static void rr_detach_teleport(void) {
 #endif
 }
 
-JL_DLLEXPORT int repl_entrypoint(int argc, char *argv[])
+JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
 {
     // no-op on Windows, note that the caller must have already converted
     // from `wchar_t` to `UTF-8` already if we're running on Windows.
@@ -695,7 +683,7 @@ JL_DLLEXPORT int repl_entrypoint(int argc, char *argv[])
 
     julia_init(jl_options.image_file_specified ? JL_IMAGE_CWD : JL_IMAGE_JULIA_HOME);
     if (lisp_prompt) {
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        jl_current_task->world_age = jl_get_world_counter();
         jl_lisp_prompt();
         return 0;
     }
diff --git a/src/jloptions.c b/src/jloptions.c
index cc5c85a06f0340..ab1af72a04c79c 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -47,6 +47,7 @@ jl_options_t jl_options = { 0,    // quiet
                             0,    // code_coverage
                             0,    // malloc_log
                             2,    // opt_level
+                            0,    // opt_level_min
 #ifdef JL_DEBUG_BUILD
                             2,    // debug_level [debug build]
 #else
@@ -122,6 +123,7 @@ static const char opts[]  =
     // code generation options
     " -C, --cpu-target <target> Limit usage of CPU features up to <target>; set to \"help\" to see the available options\n"
     " -O, --optimize={0,1,2,3}  Set the optimization level (default level is 2 if unspecified or 3 if used without a level)\n"
+    " --min-optlevel={0,1,2,3}  Set a lower bound on the optimization level (default is 0)\n"
     " -g, -g <level>            Enable / Set the level of debug info generation"
 #ifdef JL_DEBUG_BUILD
         " (default level for julia-debug is 2 if unspecified or if used without a level)\n"
@@ -129,7 +131,8 @@ static const char opts[]  =
         " (default level is 1 if unspecified or 2 if used without a level)\n"
 #endif
     " --inline={yes|no}         Control whether inlining is permitted, including overriding @inline declarations\n"
-    " --check-bounds={yes|no}   Emit bounds checks always or never (ignoring @inbounds declarations)\n"
+    " --check-bounds={yes|no|auto}\n"
+    "                           Emit bounds checks always, never, or respect @inbounds declarations\n"
 #ifdef USE_POLLY
     " --polly={yes|no}          Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)\n"
 #endif
@@ -159,7 +162,6 @@ static const char opts_hidden[]  =
 
     // compiler debugging (see the devdocs for tips on using these options)
     " --output-unopt-bc name    Generate unoptimized LLVM bitcode (.bc)\n"
-    " --output-jit-bc name      Dump all IR generated by the frontend (not including system image)\n"
     " --output-bc name          Generate LLVM bitcode (.bc)\n"
     " --output-asm name         Generate an assembly file (.s)\n"
     " --output-incremental=no   Generate an incremental output file (rather than complete)\n"
@@ -190,6 +192,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_worker,
            opt_bind_to,
            opt_handle_signals,
+           opt_optlevel_min,
            opt_output_o,
            opt_output_asm,
            opt_output_ji,
@@ -236,6 +239,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "code-coverage",   optional_argument, 0, opt_code_coverage },
         { "track-allocation",optional_argument, 0, opt_track_allocation },
         { "optimize",        optional_argument, 0, 'O' },
+        { "min-optlevel",    optional_argument, 0, opt_optlevel_min },
         { "check-bounds",    required_argument, 0, opt_check_bounds },
         { "output-bc",       required_argument, 0, opt_output_bc },
         { "output-unopt-bc", required_argument, 0, opt_output_unopt_bc },
@@ -536,6 +540,24 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.opt_level = 3;
             }
             break;
+        case opt_optlevel_min: // minimum module optimize level
+            if (optarg != NULL) {
+                if (!strcmp(optarg,"0"))
+                    jl_options.opt_level_min = 0;
+                else if (!strcmp(optarg,"1"))
+                    jl_options.opt_level_min = 1;
+                else if (!strcmp(optarg,"2"))
+                    jl_options.opt_level_min = 2;
+                else if (!strcmp(optarg,"3"))
+                    jl_options.opt_level_min = 3;
+                else
+                    jl_errorf("julia: invalid argument to --min-optlevel (%s)", optarg);
+                break;
+            }
+            else {
+                jl_options.opt_level_min = 0;
+            }
+            break;
         case 'i': // isinteractive
             jl_options.isinteractive = 1;
             break;
@@ -544,8 +566,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.check_bounds = JL_OPTIONS_CHECK_BOUNDS_ON;
             else if (!strcmp(optarg,"no"))
                 jl_options.check_bounds = JL_OPTIONS_CHECK_BOUNDS_OFF;
+            else if (!strcmp(optarg,"auto"))
+                jl_options.check_bounds = JL_OPTIONS_CHECK_BOUNDS_DEFAULT;
             else
-                jl_errorf("julia: invalid argument to --check-bounds={yes|no} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --check-bounds={yes|no|auto} (%s)", optarg);
             break;
         case opt_output_bc:
             jl_options.outputbc = optarg;
diff --git a/src/jltypes.c b/src/jltypes.c
index 61541c54a808bf..43171ee332e877 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -19,6 +19,8 @@
 extern "C" {
 #endif
 
+jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
+
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 #define h2index(hv, sz) (size_t)((hv) & ((sz)-1))
@@ -35,6 +37,45 @@ static int typeenv_has(jl_typeenv_t *env, jl_tvar_t *v) JL_NOTSAFEPOINT
     return 0;
 }
 
+static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
+{
+    if (jl_typeis(v, jl_tvar_type))
+        return !typeenv_has(env, (jl_tvar_t*)v);
+    if (jl_is_uniontype(v))
+        return layout_uses_free_typevars(((jl_uniontype_t*)v)->a, env) ||
+               layout_uses_free_typevars(((jl_uniontype_t*)v)->b, env);
+    if (jl_is_vararg(v)) {
+        jl_vararg_t *vm = (jl_vararg_t*)v;
+        if (vm->T) {
+            if (layout_uses_free_typevars(vm->T, env))
+                return 1;
+            if (vm->N && layout_uses_free_typevars(vm->N, env))
+                return 1;
+        }
+        return 0;
+    }
+    if (jl_is_unionall(v)) {
+        jl_unionall_t *ua = (jl_unionall_t*)v;
+        jl_typeenv_t newenv = { ua->var, NULL, env };
+        return layout_uses_free_typevars(ua->body, &newenv);
+    }
+    if (jl_is_datatype(v)) {
+        jl_datatype_t *dt = (jl_datatype_t*)v;
+        if (dt->layout || dt->isconcretetype || !dt->name->mayinlinealloc)
+            return 0;
+        jl_svec_t *types = jl_get_fieldtypes(dt);
+        size_t i, l = jl_svec_len(types);
+        for (i = 0; i < l; i++) {
+            jl_value_t *ft = jl_svecref(types, i);
+            if (layout_uses_free_typevars(ft, env)) {
+                // This might be inline-alloc, but we don't know the layout
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
 static int has_free_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
 {
     if (jl_typeis(v, jl_tvar_type)) {
@@ -62,9 +103,8 @@ static int has_free_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
         if (expect == 0 || env == NULL)
             return expect;
         size_t i;
-        for (i=0; i < jl_nparams(v); i++) {
-            if (has_free_typevars(jl_tparam(v,i), env)) {
-                assert(expect);
+        for (i = 0; i < jl_nparams(v); i++) {
+            if (has_free_typevars(jl_tparam(v, i), env)) {
                 return 1;
             }
         }
@@ -181,6 +221,38 @@ JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua)
     return _jl_has_typevar_from_ua(t, ua, NULL);
 }
 
+int jl_has_fixed_layout(jl_datatype_t *dt)
+{
+    if (dt->layout || dt->isconcretetype)
+        return 1;
+    if (dt->name->abstract)
+        return 0;
+    if (jl_is_tuple_type(dt) || jl_is_namedtuple_type(dt))
+        return 0; // TODO: relax more?
+    jl_svec_t *types = jl_get_fieldtypes(dt);
+    size_t i, l = jl_svec_len(types);
+    for (i = 0; i < l; i++) {
+        jl_value_t *ft = jl_svecref(types, i);
+        if (layout_uses_free_typevars(ft, NULL)) {
+            // This might be inline-alloc, but we don't know the layout
+            return 0;
+        }
+    }
+    return 1;
+}
+
+int jl_type_mappable_to_c(jl_value_t *ty)
+{
+    assert(!jl_is_typevar(ty) && jl_is_type(ty));
+    if (jl_is_structtype(ty)) {
+        jl_datatype_t *jst = (jl_datatype_t*)ty;
+        return jl_has_fixed_layout(jst);
+    }
+    ty = jl_unwrap_unionall(ty);
+    if (jl_is_tuple_type(ty) || jl_is_namedtuple_type(ty))
+        return 0; // TODO: relax some?
+    return 1; // as boxed or primitive
+}
 
 // Return true for any type (Integer or Unsigned) that can fit in a
 // size_t and pass back value, else return false
@@ -825,19 +897,19 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       int cacheable, jl_typestack_t *stack, jl_typeenv_t *env);
+                                       jl_typestack_t *stack, jl_typeenv_t *env);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
 static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                     int cacheable, jl_typestack_t *stack, jl_typeenv_t *env, int c)
+                                     jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, cacheable, stack, env);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
-    return inst_datatype_env(ua->body, p, iparams, ntp, cacheable, stack, &e, c + 1);
+    return inst_datatype_env(ua->body, p, iparams, ntp, stack, &e, c + 1);
 }
 
 jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
@@ -853,14 +925,7 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
         jl_value_t *u = jl_unwrap_unionall(tc);
         if (jl_is_datatype(u) && n == jl_nparams((jl_datatype_t*)u) &&
             ((jl_datatype_t*)u)->name->wrapper == tc) {
-            int cacheable = 1;
-            for (i = 0; i < n; i++) {
-                if (jl_has_free_typevars(params[i])) {
-                    cacheable = 0;
-                    break;
-                }
-            }
-            return inst_datatype_env(tc, NULL, params, n, cacheable, NULL, NULL, 0);
+            return inst_datatype_env(tc, NULL, params, n, NULL, NULL, 0);
         }
     }
     JL_GC_PUSH1(&tc);
@@ -907,20 +972,42 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
 
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1)
 {
-    JL_GC_PUSH1(&p1);
-    jl_value_t *t = jl_apply_type(tc, &p1, 1);
-    JL_GC_POP();
-    return t;
+    return jl_apply_type(tc, &p1, 1);
 }
 
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2)
 {
-    jl_value_t **args;
-    JL_GC_PUSHARGS(args, 2);
-    args[0] = p1; args[1] = p2;
-    jl_value_t *t = jl_apply_type(tc, args, 2);
-    JL_GC_POP();
-    return t;
+    jl_value_t *args[2];
+    args[0] = p1;
+    args[1] = p2;
+    return jl_apply_type(tc, args, 2);
+}
+
+jl_datatype_t *jl_apply_modify_type(jl_value_t *dt)
+{
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_pair_type, dt, dt);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    return rettyp;
+}
+
+jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
+{
+    jl_value_t *params[2];
+    jl_value_t *names = jl_atomic_load_relaxed(&cmpswap_names);
+    if (names == NULL) {
+        params[0] = (jl_value_t*)jl_symbol("old");
+        params[1] = (jl_value_t*)jl_symbol("success");
+        jl_value_t *lnames = jl_f_tuple(NULL, params, 2);
+        if (jl_atomic_cmpswap(&cmpswap_names, &names, lnames))
+            names = jl_atomic_load_relaxed(&cmpswap_names); // == lnames
+    }
+    params[0] = dt;
+    params[1] = (jl_value_t*)jl_bool_type;
+    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, (jl_value_t*)tuptyp);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    return rettyp;
 }
 
 JL_DLLEXPORT jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
@@ -1082,7 +1169,7 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
 {
     int istuple = (dt->name == jl_tuple_typename);
     dt->hasfreetypevars = 0;
-    dt->isconcretetype = !dt->abstract;
+    dt->isconcretetype = !dt->name->abstract;
     dt->isdispatchtuple = istuple;
     size_t i, l = jl_nparams(dt);
     for (i = 0; i < l; i++) {
@@ -1099,6 +1186,13 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
                 ((!jl_is_kind(p) && ((jl_datatype_t*)p)->isconcretetype) ||
                  (((jl_datatype_t*)p)->name == jl_type_typename && !((jl_datatype_t*)p)->hasfreetypevars));
         }
+        if (istuple && dt->has_concrete_subtype) {
+            if (jl_is_vararg(p))
+                p = ((jl_vararg_t*)p)->T;
+            // tuple types like Tuple{:x} cannot have instances
+            if (p && !jl_is_type(p) && !jl_is_typevar(p))
+                dt->has_concrete_subtype = 0;
+        }
     }
     if (dt->name == jl_type_typename)
         cacheable = 0; // the cache for Type ignores parameter normalization, so it can't be used as a regular hash
@@ -1153,16 +1247,128 @@ static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY
     return NULL;
 }
 
+int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
+{
+    if (v == (jl_value_t*)var) {
+        if (inside_inv) {
+            return 0;
+        }
+        else {
+            (*cov_count)++;
+            return *cov_count <= 1 || jl_is_concrete_type(var->ub);
+        }
+    }
+    else if (jl_is_uniontype(v)) {
+        return _may_substitute_ub(((jl_uniontype_t*)v)->a, var, inside_inv, cov_count) &&
+            _may_substitute_ub(((jl_uniontype_t*)v)->b, var, inside_inv, cov_count);
+    }
+    else if (jl_is_unionall(v)) {
+        jl_unionall_t *ua = (jl_unionall_t*)v;
+        if (ua->var == var)
+            return 1;
+        return _may_substitute_ub(ua->var->lb, var, inside_inv, cov_count) &&
+            _may_substitute_ub(ua->var->ub, var, inside_inv, cov_count) &&
+            _may_substitute_ub(ua->body, var, inside_inv, cov_count);
+    }
+    else if (jl_is_datatype(v)) {
+        int invar = inside_inv || !jl_is_tuple_type(v);
+        for (size_t i = 0; i < jl_nparams(v); i++) {
+            if (!_may_substitute_ub(jl_tparam(v,i), var, invar, cov_count))
+                return 0;
+        }
+    }
+    else if (jl_is_vararg(v)) {
+        jl_vararg_t *va = (jl_vararg_t*)v;
+        int old_count = *cov_count;
+        if (va->T && !_may_substitute_ub(va->T, var, inside_inv, cov_count))
+            return 0;
+        if (*cov_count > old_count && !jl_is_concrete_type(var->ub))
+            return 0;
+        if (va->N && !_may_substitute_ub(va->N, var, 1, cov_count))
+            return 0;
+    }
+    return 1;
+}
+
+// Check whether `var` may be replaced with its upper bound `ub` in `v where var<:ub`
+// Conditions:
+//  * `var` does not appear in invariant position
+//  * `var` appears at most once (in covariant position) and not in a `Vararg`
+//    unless the upper bound is concrete (diagonal rule)
+int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
+{
+    int cov_count = 0;
+    return _may_substitute_ub(v, var, 0, &cov_count);
+}
+
+jl_value_t *normalize_unionalls(jl_value_t *t)
+{
+    JL_GC_PUSH1(&t);
+    if (jl_is_uniontype(t)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        jl_value_t *a = NULL;
+        jl_value_t *b = NULL;
+        JL_GC_PUSH2(&a, &b);
+        a = normalize_unionalls(u->a);
+        b = normalize_unionalls(u->b);
+        if (a != u->a || b != u->b) {
+            t = jl_new_struct(jl_uniontype_type, a, b);
+        }
+        JL_GC_POP();
+    }
+    else if (jl_is_unionall(t)) {
+        jl_unionall_t *u = (jl_unionall_t*)t;
+        jl_value_t *body = normalize_unionalls(u->body);
+        if (body != u->body) {
+            JL_GC_PUSH1(&body);
+            t = jl_new_struct(jl_unionall_type, u->var, body);
+            JL_GC_POP();
+            u = (jl_unionall_t*)t;
+        }
+
+        if (u->var->lb == u->var->ub || may_substitute_ub(body, u->var)) {
+            JL_TRY {
+                t = jl_instantiate_unionall(u, u->var->ub);
+            }
+            JL_CATCH {
+                // just skip normalization
+                // (may happen for bounds inconsistent with the wrapper's bounds)
+            }
+        }
+    }
+    JL_GC_POP();
+    return t;
+}
+
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       int cacheable, jl_typestack_t *stack, jl_typeenv_t *env)
+                                       jl_typestack_t *stack, jl_typeenv_t *env)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
     int istuple = (tn == jl_tuple_typename);
     int isnamedtuple = (tn == jl_namedtuple_typename);
-    // check type cache
+    if (dt->name != jl_type_typename) {
+        size_t i;
+        for (i = 0; i < ntp; i++)
+            iparams[i] = normalize_unionalls(iparams[i]);
+    }
+
+    // check type cache, if applicable
+    int cacheable = 1;
+    if (istuple) {
+        size_t i;
+        for (i = 0; cacheable && i < ntp; i++)
+            if (!jl_is_concrete_type(iparams[i]) && iparams[i] != jl_bottom_type)
+                cacheable = 0;
+    }
+    else {
+        size_t i;
+        for (i = 0; cacheable && i < ntp; i++)
+            if (jl_has_free_typevars(iparams[i]))
+                cacheable = 0;
+    }
     if (cacheable) {
         size_t i;
         for (i = 0; i < ntp; i++) {
@@ -1208,9 +1414,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     jl_value_t *last = iparams[ntp - 1];
     JL_GC_PUSH3(&p, &ndt, &last);
 
-    int isvatuple = 0;
     if (istuple && ntp > 0 && jl_is_vararg(last)) {
-        isvatuple = 1;
         // normalize Tuple{..., Vararg{Int, 3}} to Tuple{..., Int, Int, Int}
         jl_value_t *va = jl_unwrap_unionall(last);
         jl_value_t *va0 = jl_unwrap_vararg(va), *va1 = jl_unwrap_vararg_num(va);
@@ -1274,7 +1478,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     jl_gc_wb(ndt, ndt->parameters);
     ndt->types = NULL; // to be filled in below
     if (istuple) {
-        ndt->types = p;
+        ndt->types = p; // TODO: this may need to filter out certain types
     }
     else if (isnamedtuple) {
         jl_value_t *names_tup = jl_svecref(p, 0);
@@ -1283,48 +1487,33 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             if (!jl_is_tuple(names_tup))
                 jl_type_error_rt("NamedTuple", "names", (jl_value_t*)jl_anytuple_type, names_tup);
             size_t nf = jl_nfields(names_tup);
-            jl_svec_t *names = jl_alloc_svec_uninit(nf);
             for (size_t i = 0; i < nf; i++) {
                 jl_value_t *ni = jl_fieldref(names_tup, i);
                 if (!jl_is_symbol(ni))
                     jl_type_error_rt("NamedTuple", "name", (jl_value_t*)jl_symbol_type, ni);
                 for (size_t j = 0; j < i; j++) {
-                    if (ni == jl_svecref(names, j))
+                    if (ni == jl_fieldref_noalloc(names_tup, j))
                         jl_errorf("duplicate field name in NamedTuple: \"%s\" is not unique", jl_symbol_name((jl_sym_t*)ni));
                 }
-                jl_svecset(names, i, ni);
             }
             if (!jl_is_datatype(values_tt))
                 jl_error("NamedTuple field type must be a tuple type");
             if (jl_is_va_tuple((jl_datatype_t*)values_tt) || jl_nparams(values_tt) != nf)
                 jl_error("NamedTuple names and field types must have matching lengths");
-            ndt->names = names;
-            jl_gc_wb(ndt, ndt->names);
             ndt->types = ((jl_datatype_t*)values_tt)->parameters;
             jl_gc_wb(ndt, ndt->types);
         }
         else {
-            ndt->types = jl_emptysvec;
+            ndt->types = jl_emptysvec; // XXX: this is essentially always false
         }
     }
-    ndt->mutabl = dt->mutabl;
-    ndt->abstract = dt->abstract;
-    ndt->size = 0;
-    jl_precompute_memoized_dt(ndt, cacheable);
-    if (istuple)
-        ndt->ninitialized = ntp - isvatuple;
-    else if (isnamedtuple)
-        ndt->ninitialized = jl_svec_len(ndt->types);
-    else
-        ndt->ninitialized = dt->ninitialized;
-
-    if (jl_is_primitivetype(dt)) {
-        ndt->size = dt->size;
-        ndt->layout = dt->layout;
-        ndt->isbitstype = ndt->isinlinealloc = ndt->isconcretetype;
-    }
 
     jl_datatype_t *primarydt = ((jl_datatype_t*)jl_unwrap_unionall(tn->wrapper));
+    jl_precompute_memoized_dt(ndt, cacheable);
+    ndt->size = 0;
+    if (primarydt->layout)
+        jl_compute_field_offsets(ndt);
+
     if (istuple || isnamedtuple) {
         ndt->super = jl_any_type;
     }
@@ -1346,7 +1535,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     }
     else if (!isnamedtuple && !istuple) {
         assert(ftypes != jl_emptysvec || jl_field_names(ndt) == jl_emptysvec);
-        assert(ftypes == jl_emptysvec || !ndt->abstract);
+        assert(ftypes == jl_emptysvec || !ndt->name->abstract);
         if (ftypes == jl_emptysvec) {
             ndt->types = ftypes;
         }
@@ -1361,10 +1550,12 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     }
 
     // now publish the finished result
+    // XXX: if the stack was used, this will publish in the wrong order,
+    // leading to incorrect layouts and data races (#40050: the A{T} should be
+    // an isbitstype singleton of size 0)
     if (cacheable) {
-        if (!jl_is_primitivetype(dt) && ndt->types != NULL && !ndt->abstract) {
+        if (ndt->layout == NULL && ndt->types != NULL && ndt->isconcretetype)
             jl_compute_field_offsets(ndt);
-        }
         jl_cache_type_(ndt);
         JL_UNLOCK(&typecache_lock); // Might GC
     }
@@ -1375,13 +1566,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
 
 static jl_tupletype_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
 {
-    int cacheable = 1;
-    for (size_t i = 0; i < np; i++) {
-        assert(p[i]);
-        if (!jl_is_concrete_type(p[i]) && p[i] != jl_bottom_type)
-            cacheable = 0;
-    }
-    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, cacheable, NULL, NULL);
+    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL);
 }
 
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params)
@@ -1403,7 +1588,6 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
 {
     jl_tupletype_t *tt = (jl_datatype_t*)lookup_typevalue(jl_tuple_typename, arg1, args, nargs, leaf);
     if (tt == NULL) {
-        int cacheable = 1;
         size_t i;
         jl_svec_t *params = jl_alloc_svec(nargs);
         JL_GC_PUSH1(&params);
@@ -1415,14 +1599,13 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
                 // `jl_typeof(ai)`, but that will require some redesign of the caching
                 // logic.
                 ai = (jl_value_t*)jl_wrap_Type(ai);
-                cacheable = 0;
             }
             else {
                 ai = jl_typeof(ai);
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, cacheable, NULL, NULL);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL);
         JL_GC_POP();
     }
     return tt;
@@ -1490,9 +1673,6 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         iparams = jl_svec_data(ip_heap);
     }
     int bound = 0;
-    int cacheable = 1;
-    if (jl_is_va_tuple(tt))
-        cacheable = 0;
     int i;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
@@ -1501,11 +1681,9 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
-        if (cacheable && !jl_is_concrete_type(pi))
-            cacheable = 0;
     }
     if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, cacheable, stack, env);
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env);
     JL_GC_POP();
     return t;
 }
@@ -1592,18 +1770,16 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
     size_t ntp = jl_svec_len(tp);
     jl_value_t **iparams;
     JL_GC_PUSHARGS(iparams, ntp);
-    int cacheable = 1, bound = 0;
+    int bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
         jl_value_t *pi = inst_type_w_(elt, env, stack, check);
         iparams[i] = pi;
         bound |= (pi != elt);
-        if (cacheable && jl_has_free_typevars(pi))
-            cacheable = 0;
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
     if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, cacheable, stack, env);
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env);
     JL_GC_POP();
     return t;
 }
@@ -1675,8 +1851,8 @@ jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n)
             jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
         }
     }
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ptls, sizeof(jl_vararg_t), jl_vararg_type);
+    jl_task_t *ct = jl_current_task;
+    jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
     vm->T = t;
     vm->N = n;
     return vm;
@@ -1777,7 +1953,6 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_symbol_type = jl_new_uninitialized_datatype();
     jl_simplevector_type = jl_new_uninitialized_datatype();
     jl_methtable_type = jl_new_uninitialized_datatype();
-    jl_nothing = jl_gc_permobj(0, NULL);
 
     jl_emptysvec = (jl_svec_t*)jl_gc_permobj(sizeof(void*), jl_simplevector_type);
     jl_svec_set_len_unsafe(jl_emptysvec, 0);
@@ -1793,69 +1968,60 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_type_typename->mt = jl_type_type_mt;
 
     // initialize them. lots of cycles.
-    jl_datatype_type->name = jl_new_typename_in(jl_symbol("DataType"), core);
+    // NOTE: types are not actually mutable, but we want to ensure they are heap-allocated with stable addresses
+    jl_datatype_type->name = jl_new_typename_in(jl_symbol("DataType"), core, 0, 1);
     jl_datatype_type->name->wrapper = (jl_value_t*)jl_datatype_type;
     jl_datatype_type->super = (jl_datatype_t*)jl_type_type;
     jl_datatype_type->parameters = jl_emptysvec;
-    jl_datatype_type->name->names = jl_perm_symsvec(20,
-                                                    "name",
-                                                    "super",
-                                                    "parameters",
-                                                    "types",
-                                                    "names",
-                                                    "instance",
-                                                    "layout",
-                                                    "size",
-                                                    "ninitialized",
-                                                    "hash",
-                                                    "abstract",
-                                                    "mutable",
-                                                    "hasfreetypevars",
-                                                    "isconcretetype",
-                                                    "isdispatchtuple",
-                                                    "isbitstype",
-                                                    "zeroinit",
-                                                    "isinlinealloc",
-                                                    "has_concrete_subtype",
-                                                    "cached_by_hash");
-    jl_datatype_type->types = jl_svec(20,
-                                      jl_typename_type,
-                                      jl_datatype_type,
-                                      jl_simplevector_type,
-                                      jl_simplevector_type, jl_simplevector_type,
-                                      jl_any_type, // instance
-                                      jl_any_type, jl_any_type, jl_any_type, jl_any_type, // properties
-                                      jl_any_type, jl_any_type, jl_any_type, jl_any_type,
-                                      jl_any_type, jl_any_type, jl_any_type, jl_any_type,
-                                      jl_any_type, jl_any_type);
-    jl_datatype_type->abstract = 0;
-    // NOTE: types are not actually mutable, but we want to ensure they are heap-allocated with stable addresses
-    jl_datatype_type->mutabl = 1;
-    jl_datatype_type->ninitialized = 3;
+    jl_datatype_type->name->n_uninitialized = 9 - 3;
+    jl_datatype_type->name->names = jl_perm_symsvec(9,
+            "name",
+            "super",
+            "parameters",
+            "types",
+            "instance",
+            "layout",
+            "size",
+            "hash",
+            "flags"); // "hasfreetypevars", "isconcretetype", "isdispatchtuple", "isbitstype", "zeroinit", "has_concrete_subtype", "cached_by_hash"
+    jl_datatype_type->types = jl_svec(9,
+            jl_typename_type,
+            jl_datatype_type,
+            jl_simplevector_type,
+            jl_simplevector_type,
+            jl_any_type, // instance
+            jl_any_type /*jl_voidpointer_type*/,
+            jl_any_type /*jl_int32_type*/,
+            jl_any_type /*jl_int32_type*/,
+            jl_any_type /*jl_uint8_type*/);
     jl_precompute_memoized_dt(jl_datatype_type, 1);
 
-    jl_typename_type->name = jl_new_typename_in(jl_symbol("TypeName"), core);
+    jl_typename_type->name = jl_new_typename_in(jl_symbol("TypeName"), core, 0, 1);
     jl_typename_type->name->wrapper = (jl_value_t*)jl_typename_type;
     jl_typename_type->name->mt = jl_nonfunction_mt;
     jl_typename_type->super = jl_any_type;
     jl_typename_type->parameters = jl_emptysvec;
-    jl_typename_type->name->names = jl_perm_symsvec(9, "name", "module",
-                                                    "names", "wrapper",
-                                                    "cache", "linearcache",
-                                                    "hash", "mt", "partial");
-    jl_typename_type->types = jl_svec(9, jl_symbol_type, jl_any_type, jl_simplevector_type,
+    jl_typename_type->name->n_uninitialized = 12 - 2;
+    jl_typename_type->name->names = jl_perm_symsvec(12, "name", "module",
+                                                    "names", "atomicfields",
+                                                    "wrapper", "cache", "linearcache",
+                                                    "mt", "partial",
+                                                    "hash", "n_uninitialized",
+                                                    "flags"); // "abstract", "mutable", "mayinlinealloc",
+    jl_typename_type->types = jl_svec(12, jl_symbol_type, jl_any_type /*jl_module_type*/,
+                                      jl_simplevector_type, jl_any_type/*jl_voidpointer_type*/,
                                       jl_type_type, jl_simplevector_type, jl_simplevector_type,
-                                      jl_any_type, jl_methtable_type, jl_any_type);
-    jl_typename_type->abstract = 0;
-    jl_typename_type->mutabl = 1;
-    jl_typename_type->ninitialized = 2;
+                                      jl_methtable_type, jl_any_type,
+                                      jl_any_type /*jl_long_type*/, jl_any_type /*jl_int32_type*/,
+                                      jl_any_type /*jl_uint8_type*/);
     jl_precompute_memoized_dt(jl_typename_type, 1);
 
-    jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core);
+    jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core, 0, 1);
     jl_methtable_type->name->wrapper = (jl_value_t*)jl_methtable_type;
     jl_methtable_type->name->mt = jl_nonfunction_mt;
     jl_methtable_type->super = jl_any_type;
     jl_methtable_type->parameters = jl_emptysvec;
+    jl_methtable_type->name->n_uninitialized = 12 - 5;
     jl_methtable_type->name->names = jl_perm_symsvec(12, "name", "defs",
                                                      "leafcache", "cache", "max_args",
                                                      "kwsorter", "module",
@@ -1865,72 +2031,65 @@ void jl_init_types(void) JL_GC_DISABLED
                                        jl_any_type, jl_any_type/*module*/,
                                        jl_any_type/*any vector*/, jl_any_type/*long*/, jl_any_type/*int32*/,
                                        jl_any_type/*uint8*/, jl_any_type/*uint8*/);
-    jl_methtable_type->abstract = 0;
-    jl_methtable_type->mutabl = 1;
-    jl_methtable_type->ninitialized = 5;
     jl_precompute_memoized_dt(jl_methtable_type, 1);
 
-    jl_symbol_type->name = jl_new_typename_in(jl_symbol("Symbol"), core);
+    jl_symbol_type->name = jl_new_typename_in(jl_symbol("Symbol"), core, 0, 1);
     jl_symbol_type->name->wrapper = (jl_value_t*)jl_symbol_type;
     jl_symbol_type->name->mt = jl_nonfunction_mt;
     jl_symbol_type->super = jl_any_type;
     jl_symbol_type->parameters = jl_emptysvec;
+    jl_symbol_type->name->n_uninitialized = 0;
     jl_symbol_type->name->names = jl_emptysvec;
     jl_symbol_type->types = jl_emptysvec;
     jl_symbol_type->size = 0;
-    jl_symbol_type->abstract = 0;
-    jl_symbol_type->mutabl = 1;
-    jl_symbol_type->ninitialized = 0;
     jl_precompute_memoized_dt(jl_symbol_type, 1);
 
-    jl_simplevector_type->name = jl_new_typename_in(jl_symbol("SimpleVector"), core);
+    jl_simplevector_type->name = jl_new_typename_in(jl_symbol("SimpleVector"), core, 0, 1);
     jl_simplevector_type->name->wrapper = (jl_value_t*)jl_simplevector_type;
     jl_simplevector_type->name->mt = jl_nonfunction_mt;
     jl_simplevector_type->super = jl_any_type;
     jl_simplevector_type->parameters = jl_emptysvec;
+    jl_simplevector_type->name->n_uninitialized = 0;
     jl_simplevector_type->name->names = jl_emptysvec;
     jl_simplevector_type->types = jl_emptysvec;
-    jl_simplevector_type->abstract = 0;
-    jl_simplevector_type->mutabl = 1;
-    jl_simplevector_type->ninitialized = 0;
     jl_precompute_memoized_dt(jl_simplevector_type, 1);
 
     // now they can be used to create the remaining base kinds and types
     jl_nothing_type = jl_new_datatype(jl_symbol("Nothing"), core, jl_any_type, jl_emptysvec,
-                                      jl_emptysvec, jl_emptysvec, 0, 0, 0);
+                                      jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     jl_void_type = jl_nothing_type; // deprecated alias
     jl_astaggedvalue(jl_nothing)->header = ((uintptr_t)jl_nothing_type) | GC_OLD_MARKED;
     jl_nothing_type->instance = jl_nothing;
 
     jl_datatype_t *type_type = (jl_datatype_t*)jl_type_type;
     jl_typeofbottom_type = jl_new_datatype(jl_symbol("TypeofBottom"), core, type_type, jl_emptysvec,
-                                         jl_emptysvec, jl_emptysvec, 0, 0, 0);
+                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     jl_bottom_type = jl_new_struct(jl_typeofbottom_type);
     jl_typeofbottom_type->instance = jl_bottom_type;
 
     jl_uniontype_type = jl_new_datatype(jl_symbol("Union"), core, type_type, jl_emptysvec,
                                         jl_perm_symsvec(2, "a", "b"),
                                         jl_svec(2, jl_any_type, jl_any_type),
-                                        0, 0, 2);
+                                        jl_emptysvec, 0, 0, 2);
 
     jl_tvar_type = jl_new_datatype(jl_symbol("TypeVar"), core, jl_any_type, jl_emptysvec,
                                    jl_perm_symsvec(3, "name", "lb", "ub"),
                                    jl_svec(3, jl_symbol_type, jl_any_type, jl_any_type),
-                                   0, 1, 3);
+                                   jl_emptysvec, 0, 1, 3);
 
     jl_unionall_type = jl_new_datatype(jl_symbol("UnionAll"), core, type_type, jl_emptysvec,
                                        jl_perm_symsvec(2, "var", "body"),
                                        jl_svec(2, jl_tvar_type, jl_any_type),
-                                       0, 0, 2);
+                                       jl_emptysvec, 0, 0, 2);
 
     jl_vararg_type = jl_new_datatype(jl_symbol("TypeofVararg"), core, jl_any_type, jl_emptysvec,
                                             jl_perm_symsvec(2, "T", "N"),
                                             jl_svec(2, jl_any_type, jl_any_type),
-                                            0, 0, 0);
+                                            jl_emptysvec, 0, 0, 0);
 
     jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL));
     jl_anytuple_type = jl_new_datatype(jl_symbol("Tuple"), core, jl_any_type, anytuple_params,
-                                       jl_emptysvec, anytuple_params, 0, 0, 0);
+                                       jl_emptysvec, anytuple_params, jl_emptysvec, 0, 0, 0);
     jl_tuple_typename = jl_anytuple_type->name;
     // fix some miscomputed values, since we didn't know this was going to be a Tuple in jl_precompute_memoized_dt
     jl_tuple_typename->wrapper = (jl_value_t*)jl_anytuple_type; // remove UnionAll wrappers
@@ -1966,22 +2125,26 @@ void jl_init_types(void) JL_GC_DISABLED
 
     jl_ssavalue_type = jl_new_datatype(jl_symbol("SSAValue"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "id"),
-                                       jl_svec1(jl_long_type), 0, 0, 1);
+                                       jl_svec1(jl_long_type),
+                                       jl_emptysvec, 0, 0, 1);
 
     jl_abstractslot_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Slot"), core, jl_any_type,
                                                jl_emptysvec);
 
     jl_slotnumber_type = jl_new_datatype(jl_symbol("SlotNumber"), core, jl_abstractslot_type, jl_emptysvec,
                                          jl_perm_symsvec(1, "id"),
-                                         jl_svec1(jl_long_type), 0, 0, 1);
+                                         jl_svec1(jl_long_type),
+                                         jl_emptysvec, 0, 0, 1);
 
     jl_typedslot_type = jl_new_datatype(jl_symbol("TypedSlot"), core, jl_abstractslot_type, jl_emptysvec,
                                         jl_perm_symsvec(2, "id", "typ"),
-                                        jl_svec(2, jl_long_type, jl_any_type), 0, 0, 2);
+                                        jl_svec(2, jl_long_type, jl_any_type),
+                                        jl_emptysvec, 0, 0, 2);
 
     jl_argument_type = jl_new_datatype(jl_symbol("Argument"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "n"),
-                                       jl_svec1(jl_long_type), 0, 0, 1);
+                                       jl_svec1(jl_long_type),
+                                       jl_emptysvec, 0, 0, 1);
 
     jl_init_int32_int64_cache();
 
@@ -1993,7 +2156,7 @@ void jl_init_types(void) JL_GC_DISABLED
 
     jl_abstractstring_type = jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractString"), core, jl_any_type, jl_emptysvec);
     jl_string_type = jl_new_datatype(jl_symbol("String"), core, jl_abstractstring_type, jl_emptysvec,
-                                     jl_emptysvec, jl_emptysvec, 0, 1, 0);
+                                     jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
     jl_string_type->instance = NULL;
     jl_compute_field_offsets(jl_string_type);
     jl_an_empty_string = jl_pchar_to_string("\0", 1);
@@ -2015,6 +2178,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             jl_any_type,
                             jl_any_type),
+                        jl_emptysvec,
                         0, 1, 6);
 
     jl_typemap_entry_type =
@@ -2041,6 +2205,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type),
+                        jl_emptysvec,
                         0, 1, 4);
 
     jl_function_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Function"), core, jl_any_type, jl_emptysvec);
@@ -2062,10 +2227,8 @@ void jl_init_types(void) JL_GC_DISABLED
     tv = jl_svec2(tvar("T"), tvar("N"));
     jl_array_type = (jl_unionall_t*)
         jl_new_datatype(jl_symbol("Array"), core,
-                        (jl_datatype_t*)
-                        jl_apply_type((jl_value_t*)jl_densearray_type, jl_svec_data(tv), 2),
-                        tv,
-                        jl_emptysvec, jl_emptysvec, 0, 1, 0)->name->wrapper;
+                        (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_densearray_type, jl_svec_data(tv), 2),
+                        tv, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0)->name->wrapper;
     jl_array_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->name;
     jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type));
 
@@ -2082,11 +2245,11 @@ void jl_init_types(void) JL_GC_DISABLED
                         jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "head", "args"),
                         jl_svec(2, jl_symbol_type, jl_array_any_type),
-                        0, 1, 2);
+                        jl_emptysvec, 0, 1, 2);
 
     jl_module_type =
         jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
-                        jl_emptysvec, jl_emptysvec, 0, 1, 0);
+                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
     jl_module_type->instance = NULL;
     jl_compute_field_offsets(jl_module_type);
 
@@ -2094,63 +2257,74 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_linenumbernode_type =
         jl_new_datatype(jl_symbol("LineNumberNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "line", "file"),
-                        jl_svec(2, jl_long_type, jl_type_union(symornothing, 2)), 0, 0, 2);
+                        jl_svec(2, jl_long_type, jl_type_union(symornothing, 2)),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_lineinfonode_type =
         jl_new_datatype(jl_symbol("LineInfoNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(5, "module", "method", "file", "line", "inlined_at"),
                         jl_svec(5, jl_module_type, jl_any_type, jl_symbol_type, jl_long_type, jl_long_type),
-                        0, 0, 5);
+                        jl_emptysvec, 0, 0, 5);
 
     jl_gotonode_type =
         jl_new_datatype(jl_symbol("GotoNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "label"),
-                        jl_svec(1, jl_long_type), 0, 0, 1);
+                        jl_svec(1, jl_long_type),
+                        jl_emptysvec, 0, 0, 1);
 
     jl_gotoifnot_type =
         jl_new_datatype(jl_symbol("GotoIfNot"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "cond", "dest"),
-                        jl_svec(2, jl_any_type, jl_long_type), 0, 0, 2);
+                        jl_svec(2, jl_any_type, jl_long_type),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_returnnode_type =
         jl_new_datatype(jl_symbol("ReturnNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "val"),
-                        jl_svec(1, jl_any_type), 0, 0, 0);
+                        jl_svec(1, jl_any_type),
+                        jl_emptysvec, 0, 0, 0);
 
     jl_pinode_type =
         jl_new_datatype(jl_symbol("PiNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "val", "typ"),
-                        jl_svec(2, jl_any_type, jl_any_type), 0, 0, 2);
+                        jl_svec(2, jl_any_type, jl_any_type),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_phinode_type =
         jl_new_datatype(jl_symbol("PhiNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "edges", "values"),
-                        jl_svec(2, jl_array_int32_type, jl_array_any_type), 0, 0, 2);
+                        jl_svec(2, jl_array_int32_type, jl_array_any_type),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_phicnode_type =
         jl_new_datatype(jl_symbol("PhiCNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "values"),
-                        jl_svec(1, jl_array_any_type), 0, 0, 1);
+                        jl_svec(1, jl_array_any_type),
+                        jl_emptysvec, 0, 0, 1);
 
     jl_upsilonnode_type =
         jl_new_datatype(jl_symbol("UpsilonNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "val"),
-                        jl_svec(1, jl_any_type), 0, 0, 0);
+                        jl_svec(1, jl_any_type),
+                        jl_emptysvec, 0, 0, 0);
 
     jl_quotenode_type =
         jl_new_datatype(jl_symbol("QuoteNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "value"),
-                        jl_svec(1, jl_any_type), 0, 0, 1);
+                        jl_svec(1, jl_any_type),
+                        jl_emptysvec, 0, 0, 1);
 
     jl_newvarnode_type =
         jl_new_datatype(jl_symbol("NewvarNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "slot"),
-                        jl_svec(1, jl_slotnumber_type), 0, 0, 1);
+                        jl_svec(1, jl_slotnumber_type),
+                        jl_emptysvec, 0, 0, 1);
 
     jl_globalref_type =
         jl_new_datatype(jl_symbol("GlobalRef"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "mod", "name"),
-                        jl_svec(2, jl_module_type, jl_symbol_type), 0, 0, 2);
+                        jl_svec(2, jl_module_type, jl_symbol_type),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_code_info_type =
         jl_new_datatype(jl_symbol("CodeInfo"), core,
@@ -2195,12 +2369,13 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type),
+                        jl_emptysvec,
                         0, 1, 19);
 
     jl_method_type =
         jl_new_datatype(jl_symbol("Method"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(25,
+                        jl_perm_symsvec(26,
                             "name",
                             "module",
                             "file",
@@ -2211,6 +2386,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "specializations",
                             "speckeyset",
                             "slot_syms",
+                            "external_mt",
                             "source",
                             "unspecialized",
                             "generator",
@@ -2226,7 +2402,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "pure",
                             "is_for_opaque_closure",
                             "aggressive_constprop"),
-                        jl_svec(25,
+                        jl_svec(26,
                             jl_symbol_type,
                             jl_module_type,
                             jl_symbol_type,
@@ -2238,6 +2414,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_array_type,
                             jl_string_type,
                             jl_any_type,
+                            jl_any_type,
                             jl_any_type, // jl_method_instance_type
                             jl_any_type,
                             jl_array_any_type,
@@ -2252,6 +2429,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type),
+                        jl_emptysvec,
                         0, 1, 10);
 
     jl_method_instance_type =
@@ -2275,6 +2453,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             jl_any_type,
                             jl_bool_type),
+                        jl_emptysvec,
                         0, 1, 3);
 
     jl_code_instance_type =
@@ -2304,24 +2483,29 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_any_type, jl_any_type), // fptrs
+                        jl_emptysvec,
                         0, 1, 1);
     jl_svecset(jl_code_instance_type->types, 1, jl_code_instance_type);
 
     jl_const_type = jl_new_datatype(jl_symbol("Const"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "val"),
-                                       jl_svec1(jl_any_type), 0, 0, 1);
+                                       jl_svec1(jl_any_type),
+                                       jl_emptysvec, 0, 0, 1);
 
     jl_partial_struct_type = jl_new_datatype(jl_symbol("PartialStruct"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(2, "typ", "fields"),
-                                       jl_svec2(jl_any_type, jl_array_any_type), 0, 0, 2);
+                                       jl_svec2(jl_any_type, jl_array_any_type),
+                                       jl_emptysvec, 0, 0, 2);
 
     jl_interconditional_type = jl_new_datatype(jl_symbol("InterConditional"), core, jl_any_type, jl_emptysvec,
                                           jl_perm_symsvec(3, "slot", "vtype", "elsetype"),
-                                          jl_svec(3, jl_long_type, jl_any_type, jl_any_type), 0, 0, 3);
+                                          jl_svec(3, jl_long_type, jl_any_type, jl_any_type),
+                                          jl_emptysvec, 0, 0, 3);
 
     jl_method_match_type = jl_new_datatype(jl_symbol("MethodMatch"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(4, "spec_types", "sparams", "method", "fully_covers"),
-                                       jl_svec(4, jl_type_type, jl_simplevector_type, jl_method_type, jl_bool_type), 0, 0, 4);
+                                       jl_svec(4, jl_type_type, jl_simplevector_type, jl_method_type, jl_bool_type),
+                                       jl_emptysvec, 0, 0, 4);
 
     // all Kinds share the Type method table (not the nonfunction one)
     jl_unionall_type->name->mt = jl_uniontype_type->name->mt = jl_datatype_type->name->mt =
@@ -2361,7 +2545,7 @@ void jl_init_types(void) JL_GC_DISABLED
                                           (jl_value_t*)jl_anytuple_type);
     tv = jl_svec2(tvar("names"), ntval_var);
     jl_datatype_t *ntt = jl_new_datatype(jl_symbol("NamedTuple"), core, jl_any_type, tv,
-                                         jl_emptysvec, jl_emptysvec, 0, 0, 0);
+                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     jl_namedtuple_type = (jl_unionall_t*)ntt->name->wrapper;
     ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_namedtuple_type))->layout = NULL;
     jl_namedtuple_typename = ntt->name;
@@ -2371,7 +2555,7 @@ void jl_init_types(void) JL_GC_DISABLED
                         NULL,
                         jl_any_type,
                         jl_emptysvec,
-                        jl_perm_symsvec(10,
+                        jl_perm_symsvec(14,
                                         "next",
                                         "queue",
                                         "storage",
@@ -2379,10 +2563,14 @@ void jl_init_types(void) JL_GC_DISABLED
                                         "result",
                                         "logstate",
                                         "code",
+                                        "rngState0",
+                                        "rngState1",
+                                        "rngState2",
+                                        "rngState3",
                                         "_state",
                                         "sticky",
                                         "_isexception"),
-                        jl_svec(10,
+                        jl_svec(14,
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
@@ -2390,19 +2578,26 @@ void jl_init_types(void) JL_GC_DISABLED
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
                                 jl_uint8_type,
                                 jl_bool_type,
                                 jl_bool_type),
+                        jl_emptysvec,
                         0, 1, 6);
     jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
     jl_svecset(jl_task_type->types, 0, listt);
+    jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header;
 
     jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
 
     tv = jl_svec2(tvar("A"), tvar("R"));
     jl_opaque_closure_type = (jl_unionall_t*)jl_new_datatype(jl_symbol("OpaqueClosure"), core, jl_function_type, tv,
         jl_perm_symsvec(6, "captures", "isva", "world", "source", "invoke", "specptr"),
-        jl_svec(6, jl_any_type, jl_bool_type, jl_long_type, jl_any_type, pointer_void, pointer_void), 0, 0, 6)->name->wrapper;
+        jl_svec(6, jl_any_type, jl_bool_type, jl_long_type, jl_any_type, pointer_void, pointer_void),
+        jl_emptysvec, 0, 0, 6)->name->wrapper;
     jl_opaque_closure_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type))->name;
     jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type));
 
@@ -2410,28 +2605,21 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_partial_opaque_type = jl_new_datatype(jl_symbol("PartialOpaque"), core, jl_any_type, jl_emptysvec,
         jl_perm_symsvec(5, "typ", "env", "isva", "parent", "source"),
         jl_svec(5, jl_type_type, jl_any_type, jl_bool_type, jl_method_instance_type, jl_method_type),
-        0, 0, 5);
+        jl_emptysvec, 0, 0, 5);
 
     // complete builtin type metadata
     jl_voidpointer_type = (jl_datatype_t*)pointer_void;
     jl_uint8pointer_type = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_uint8_type);
-    jl_svecset(jl_datatype_type->types, 6, jl_voidpointer_type);
+    jl_svecset(jl_datatype_type->types, 5, jl_voidpointer_type);
+    jl_svecset(jl_datatype_type->types, 6, jl_int32_type);
     jl_svecset(jl_datatype_type->types, 7, jl_int32_type);
-    jl_svecset(jl_datatype_type->types, 8, jl_int32_type);
-    jl_svecset(jl_datatype_type->types, 9, jl_int32_type);
-    jl_svecset(jl_datatype_type->types, 10, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 11, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 12, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 13, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 14, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 15, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 16, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 17, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 18, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 19, jl_bool_type);
+    jl_svecset(jl_datatype_type->types, 8, jl_uint8_type);
     jl_svecset(jl_typename_type->types, 1, jl_module_type);
-    jl_svecset(jl_typename_type->types, 6, jl_long_type);
-    jl_svecset(jl_typename_type->types, 3, jl_type_type);
+    jl_svecset(jl_typename_type->types, 3, jl_voidpointer_type);
+    jl_svecset(jl_typename_type->types, 4, jl_type_type);
+    jl_svecset(jl_typename_type->types, 9, jl_long_type);
+    jl_svecset(jl_typename_type->types, 10, jl_int32_type);
+    jl_svecset(jl_typename_type->types, 11, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 4, jl_long_type);
     jl_svecset(jl_methtable_type->types, 6, jl_module_type);
     jl_svecset(jl_methtable_type->types, 7, jl_array_any_type);
@@ -2444,7 +2632,7 @@ void jl_init_types(void) JL_GC_DISABLED
 #endif
     jl_svecset(jl_methtable_type->types, 10, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 11, jl_uint8_type);
-    jl_svecset(jl_method_type->types, 11, jl_method_instance_type);
+    jl_svecset(jl_method_type->types, 12, jl_method_instance_type);
     jl_svecset(jl_method_instance_type->types, 6, jl_code_instance_type);
     jl_svecset(jl_code_instance_type->types, 9, jl_voidpointer_type);
     jl_svecset(jl_code_instance_type->types, 10, jl_voidpointer_type);
@@ -2462,10 +2650,10 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_compute_field_offsets(jl_symbol_type);
 
     // override the preferred layout for a couple types
-    jl_lineinfonode_type->isinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
+    jl_lineinfonode_type->name->mayinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
     // It seems like we probably usually end up needing the box for kinds (used in an Any context)--but is that true?
-    jl_uniontype_type->isinlinealloc = 0;
-    jl_unionall_type->isinlinealloc = 0;
+    jl_uniontype_type->name->mayinlinealloc = 0;
+    jl_unionall_type->name->mayinlinealloc = 0;
 }
 
 #ifdef __cplusplus
diff --git a/src/julia-parser.scm b/src/julia-parser.scm
index 0b3f52ca61647e..dac32bdf81066e 100644
--- a/src/julia-parser.scm
+++ b/src/julia-parser.scm
@@ -4,24 +4,24 @@
 (define (add-dots ops) (append! ops (map (lambda (op) (symbol (string "." op))) ops)))
 
 (define prec-assignment
-  (append! (add-dots '(= += -= *= /= //= |\\=| ^= ÷= %= <<= >>= >>>= |\|=| &= ⊻= ≔ ⩴ ≕))
+  (append! (add-dots '(= += -= −= *= /= //= |\\=| ^= ÷= %= <<= >>= >>>= |\|=| &= ⊻= ≔ ⩴ ≕))
            (add-dots '(~))
            '(:= $=)))
 ;; comma - higher than assignment outside parentheses, lower when inside
 (define prec-pair (add-dots '(=>)))
 (define prec-conditional '(?))
 (define prec-arrow       (add-dots '(← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->)))
-(define prec-lazy-or     '(|\|\||))
-(define prec-lazy-and    '(&&))
+(define prec-lazy-or     (add-dots '(|\|\||)))
+(define prec-lazy-and    (add-dots '(&&)))
 (define prec-comparison
   (append! '(in isa)
-           (add-dots '(> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:))))
+           (add-dots '(> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ ⫪ ⫫ <: >:))))
 (define prec-pipe<       '(|.<\|| |<\||))
 (define prec-pipe>       '(|.\|>| |\|>|))
 (define prec-colon       (append! '(: |..|) (add-dots '(… ⁝ ⋮ ⋱ ⋰ ⋯))))
 (define prec-plus        (append! '($)
-                          (add-dots '(+ - ¦ |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣))))
-(define prec-times       (add-dots '(* / ⌿ ÷ % & ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟)))
+                          (add-dots '(+ - − ¦ |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣))))
+(define prec-times       (add-dots '(* / ⌿ ÷ % & · · ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟)))
 (define prec-rational    (add-dots '(//)))
 (define prec-bitshift    (add-dots '(<< >> >>>)))
 ;; `where`
@@ -101,6 +101,8 @@
 
 (define unary-op? (Set unary-ops))
 
+(define radical-op? (Set '(√ ∛ ∜)))
+
 ; operators that are both unary and binary
 (define unary-and-binary-ops (append! '($ & ~)
                                       (add-dots '(+ - ⋆ ± ∓))))
@@ -109,8 +111,8 @@
 
 ; operators that are special forms, not function names
 (define syntactic-operators
-  (append! (add-dots '(= += -= *= /= //= |\\=| ^= ÷= %= <<= >>= >>>= |\|=| &= ⊻=))
-           '(:= $= && |\|\|| |.| ... ->)))
+  (append! (add-dots '(&& |\|\|| = += -= *= /= //= |\\=| ^= ÷= %= <<= >>= >>>= |\|=| &= ⊻=))
+           '(:= $= |.| ... ->)))
 (define syntactic-unary-operators '($ & |::|))
 
 (define syntactic-op? (Set syntactic-operators))
@@ -232,7 +234,7 @@
   (if (and (eqv? c0 #\*) (eqv? (peek-char port) #\*))
       (error "use \"x^y\" instead of \"x**y\" for exponentiation, and \"x...\" instead of \"**x\" for splatting."))
   (if (or (eof-object? (peek-char port)) (not (op-or-sufchar? (peek-char port))))
-      (symbol (string c0)) ; 1-char operator
+      (string->normsymbol (string c0)) ; 1-char operator
       (let ((str (let loop ((str (string c0))
                             (c   (peek-char port))
                             (in-suffix? #f))
@@ -265,7 +267,7 @@
                                           (loop newop (peek-char port) sufchar?))
                                    str))
                              str))))))
-        (string->symbol str))))
+        (string->normsymbol str))))
 
 (define (accum-digits c pred port _-digit-sep)
   (let loop ((str '())
@@ -309,6 +311,9 @@
 (define (numchk n s)
   (or n (error (string "invalid numeric constant \"" s "\""))))
 
+(define (string-lastchar s)
+  (string.char s (string.dec s (length s))))
+
 (define (read-number port leadingdot neg)
   (let ((str  (open-output-string))
         (pred char-numeric?)
@@ -375,13 +380,17 @@
               (and (eq? pred char-hex?) ispP)
               (memv c '(#\e #\E #\f)))
           (begin (read-char port)
-                 (let ((d (peek-char port)))
+                 (let* ((d (peek-char port))
+                        (is-minus-sign (or (eqv? d #\-) (eqv? d #\u2212))))
                    (if (and (not (eof-object? d))
-                            (or (char-numeric? d) (eqv? d #\+) (eqv? d #\-)))
+                            (or (char-numeric? d) (eqv? d #\+) is-minus-sign))
                        (begin (set! is-float32-literal (eqv? c #\f))
                               (set! is-hex-float-literal ispP)
                               (write-char c str)
-                              (write-char (read-char port) str)
+                              (if is-minus-sign
+                                  (begin (read-char port)
+                                         (write-char #\- str))
+                                  (write-char (read-char port) str))
                               (read-digs #t #f)
                               (disallow-dot))
                        (io.ungetc port c)))))
@@ -406,7 +415,7 @@
                    (string.sub s 1)
                    s)
                r is-float32-literal)))
-      (if (and (eqv? #\. (string.char s (string.dec s (length s))))
+      (if (and (eqv? #\. (string-lastchar s))
                (let ((nxt (peek-char port)))
                  (and (not (eof-object? nxt))
                       (or (identifier-start-char? nxt)
@@ -973,7 +982,7 @@
                 (not (memv t '(#\( #\[ #\{))))
            )
        (not (ts:space? s))
-       (not (operator? t))
+       (or (not (operator? t)) (radical-op? t))
        (not (closing-token? t))
        (not (newline? t))
        (or (and (not (string? expr)) (not (eqv? t #\")))
@@ -996,7 +1005,7 @@
             (begin
               #;(if (and (number? ex) (= ex 0))
                     (error "juxtaposition with literal \"0\""))
-              (let ((next (parse-factor s)))
+              (let ((next (if (radical-op? next) (parse-unary s) (parse-factor s))))
                 (loop `(call * ,ex ,next)
                       (cons next args))))
             (if (length= args 1)
@@ -1216,6 +1225,8 @@
                           (loop (list* 'typed_vcat ex (cdr al))))
                          ((comprehension)
                           (loop (list* 'typed_comprehension ex (cdr al))))
+                         ((ncat)
+                          (loop (list* 'typed_ncat ex (cdr al))))
                          (else (error "unknown parse-cat result (internal error)")))))))
             ((|.|)
              (disallow-space s ex t)
@@ -1318,7 +1329,8 @@
 (define (valid-1arg-func-sig? sig)
   (or (symbol? sig)
       (and (pair? sig) (eq? (car sig) '|::|)
-           (symbol? (cadr sig)))))
+           (or (symbol? (cadr sig))
+               (length= sig 2)))))
 
 (define (unwrap-where x)
   (if (and (pair? x) (eq? (car x) 'where))
@@ -1364,14 +1376,14 @@
             (if (eq? word 'quote)
                 (list 'quote blk)
                 blk))))
-       ((while)  (begin0 (list 'while (parse-cond s) (parse-block s))
+       ((while)  (begin0 (list 'while (parse-cond s) (append (parse-block s) (list (line-number-node s))))
                          (expect-end s word)))
        ((for)
         (let* ((ranges (parse-comma-separated-iters s))
                (body   (parse-block s)))
           (expect-end s word)
           `(for ,(if (length= ranges 1) (car ranges) (cons 'block ranges))
-                ,body)))
+                ,(append body (list (line-number-node s))))))
 
        ((let)
         (let ((binds (if (memv (peek-token s) '(#\newline #\;))
@@ -1458,7 +1470,9 @@
                                    ;; function foo  =>  syntax error
                                    (error (string "expected \"(\" in " word " definition")))
                                (if (not (valid-func-sig? paren sig))
-                                   (error (string "expected \"(\" in " word " definition"))
+                                   (if paren
+                                       (error (string "ambiguous signature in " word " definition. Try adding a comma if this is a 1-argument anonymous function."))
+                                       (error (string "expected \"(\" in " word " definition")))
                                    sig)))
                      (body (parse-block s)))
                 (expect-end s word)
@@ -1847,60 +1861,125 @@
          (take-token s))
      `(comprehension ,gen))))
 
-(define (parse-matrix s first closer gotnewline last-end-symbol)
-  (define (fix head v) (cons head (reverse v)))
-  (define (update-outer v outer)
-    (cond ((null? v)       outer)
-          ((null? (cdr v)) (cons (car v) outer))
-          (else            (cons (fix 'row v) outer))))
-  (define semicolon (eqv? (peek-token s) #\;))
+(define (parse-array s first closer gotnewline last-end-symbol)
+  (define (fix head v)
+    (cons head (reverse v)))
+  (define (unfixrow l)
+    (cons (reverse (cdaar l)) (if (and (null? (cdar l)) (null? (cdr l)))
+                                  '()
+                                  (cons (cdar l) (cdr l)))))
+  (define (fixcat head d v)
+    (cons head (cons d (reverse v))))
+  (define (ncons a n l)
+    (if (< n 1)
+        l
+        (ncons a (1- n) (cons a l))))
+  (define (fix-level ah n)
+     (if (length= ah 1)
+         (car ah)
+         (if (= n 1)
+             (fix 'row ah)
+             (fixcat 'nrow (1- n) ah))))
+  (define (collapse-level n l i)
+    (if (> n 0)
+        (let* ((lhfix (fix-level (car l) i))
+               (lnew (if (null? (cdr l))
+                         (list (list lhfix))
+                         (cons (cons lhfix (cadr l)) (cddr l)))))
+          (collapse-level (1- n) lnew (1+ i)))
+        l))
+  (define (parse-array-inner s a is-row-first semicolon-count max-level closer gotnewline gotlinesep)
+    (define (process-semicolon next)
+      (set! semicolon-count (1+ semicolon-count))
+      (set! max-level (max max-level semicolon-count))
+      (if (and (null? is-row-first) (= semicolon-count 2) (not (eqv? next #\;)))
+          ; finding ;; that isn't a row-separator makes it column-first
+          (set! is-row-first #f))
+      (set! a (collapse-level 1 a semicolon-count)))
+    (define (restore-lower-dim-lists next)
+      (if (and (not gotlinesep) (not (memv next (list #\; 'for closer #\newline))))
+          (set! a (ncons '() semicolon-count a))))
+    (let ((t (if (or gotnewline (eqv? (peek-token s) #\newline))
+                 #\newline
+                 (require-token s))))
+      (if (eqv? t closer)
+          (begin
+            (take-token s)
+            (set! a (collapse-level (- max-level semicolon-count) a (1+ semicolon-count)))
+            (cond ((= max-level 0)
+                   (if (length= (car a) 1)
+                       (fix 'vect (car a))
+                       (fix 'hcat (car a))))
+                  ((= max-level 1)
+                   (fix 'vcat (car a)))
+                  (else
+                   (fixcat 'ncat max-level (car a)))))
+      (case t
+        ((#\newline)
+         (or gotnewline (take-token s))
+         (let ((next (peek-token s)))
+           (if (and (> semicolon-count 0) (eqv? next #\;))
+               (error (string "semicolons may appear before or after a line break in an array expression, "
+                              "but not both")))
+           (if (and (= semicolon-count 0)
+                    (not (memv next (list #\; 'for closer #\newline))))
+               ; treat a linebreak prior to a value as a semicolon if no previous semicolons observed
+                (process-semicolon next))
+           (restore-lower-dim-lists next)
+           (parse-array-inner s a is-row-first semicolon-count max-level closer #f gotlinesep)))
+        ((#\;)
+         (or gotnewline (take-token s))
+         (if (and (> semicolon-count 0) (ts:space? s)) ; disallow [a; ;b]
+             (error "multiple semicolons must be adjacent in an array expression"))
+         (let ((next (peek-token s)))
+           (let ((is-line-sep
+                 (if (and (not (null? is-row-first)) is-row-first (= semicolon-count 1))
+                     (cond ((eqv? next #\newline) #t) ; [a b ;;<newline>...
+                           ((not (or (eof-object? next) (eqv? next #\;))) ; [a b ;;...
+                             (error (string "cannot mix space and ;; separators in an array expression, "
+                                            "except to wrap a line")))
+                           (else #f)) ; [a b ;;<eof> for REPL,  [a ;;...
+                     #f))) ; [a ; b ;; c ; d...
+             (if is-line-sep
+                 (begin (set! a (unfixrow a))
+                        (set! max-level
+                              (if (null? (cdr a))
+                                  0 ; no prior single semicolon
+                                  max-level)))
+                 (begin (process-semicolon next)
+                        (restore-lower-dim-lists next)))
+           (parse-array-inner s a is-row-first semicolon-count max-level closer #f is-line-sep))))
+        ((#\,)
+         (error "unexpected comma in array expression"))
+        ((#\] #\})
+         (error (string "unexpected \"" t "\"")))
+        ((for)
+         (if (and (length= (car a) 1)
+                  (null? (cdr a)))
+             (begin ;; if we get here, there must have been some kind of space or separator
+               ;;(expect-space-before s 'for)
+               (take-token s)
+               (parse-comprehension s (caar a) closer))
+             (error "invalid comprehension syntax")))
+        (else
+         (if (and (not gotlinesep) (pair? (car a)) (not (ts:space? s)))
+            (error (string "expected \"" closer "\" or separator in arguments to \""
+                           (if (eqv? closer #\]) #\[ #\{) " " closer
+                           "\"; got \""
+                           (deparse (caar a)) t "\"")))
+         (let ((u (parse-eq* s)))
+           (set! a (cons (cons u (car a)) (cdr a)))
+           (if (= (length (car a)) 2)
+               ; at least 2 elements separated by space found [a b...], [a; b c...]
+               (if (null? is-row-first)
+                   (set! is-row-first #t)
+                   (if (not is-row-first)
+                       (error (string "cannot mix space and \";;\" separators in an array expression, "
+                                      "except to wrap a line"))))))
+         (parse-array-inner s a is-row-first 0 max-level closer #f #f))))))
   ;; if a [ ] expression is a cat expression, `end` is not special
   (with-bindings ((end-symbol last-end-symbol))
-  (let loop ((vec   (list first))
-             (outer '()))
-    (let ((t  (if (or (eqv? (peek-token s) #\newline) gotnewline)
-                  #\newline
-                  (require-token s))))
-      (if (eqv? t closer)
-          (begin (take-token s)
-                 (if (pair? outer)
-                     (fix 'vcat (update-outer vec outer))
-                     (if (or (null? vec) (null? (cdr vec)))
-                         (fix 'vect vec)     ; [x]   => (vect x)
-                         (fix 'hcat vec))))  ; [x y] => (hcat x y)
-          (case t
-            ((#\;)
-             (take-token s)
-             (if (eqv? (peek-token s) #\;)
-               (parser-depwarn s (string "Multiple semicolons in an array concatenation expression currently have no effect, "
-                                  "but may have a new meaning in a future version of Julia.")
-                                 "Please remove extra semicolons to preserve forward compatibility e.g. [1;;3] => [1;3]."))
-             (set! gotnewline #f)
-             (loop '() (update-outer vec outer)))
-            ((#\newline)
-             (or gotnewline (take-token s))
-             (set! gotnewline #f)
-             (loop '() (update-outer vec outer)))
-            ((#\,)
-             (error "unexpected comma in matrix expression"))
-            ((#\] #\})
-             (error (string "unexpected \"" t "\"")))
-            ((for)
-             (if (and (not semicolon)
-                      (length= outer 1)
-                      (null? vec))
-                 (begin ;; if we get here, there must have been some kind of space or separator
-                        ;;(expect-space-before s 'for)
-                        (take-token s)
-                        (parse-comprehension s (car outer) closer))
-                 (error "invalid comprehension syntax")))
-            (else
-             (if (and (pair? vec) (not (ts:space? s)))
-                 (error (string "expected \"" closer "\" or separator in arguments to \""
-                                (if (eqv? closer #\]) #\[ #\{) " " closer
-                                "\"; got \""
-                                (deparse (car vec)) t "\"")))
-             (loop (cons (parse-eq* s) vec) outer))))))))
+    (parse-array-inner s (list (list first)) '() 0 0 closer gotnewline #f)))
 
 (define (expect-space-before s t)
   (if (not (ts:space? s))
@@ -1927,9 +2006,9 @@
                  (take-token s)
                  (if (memv (peek-token s) (list #\, closer))
                      (parse-vect s first closer)
-                     (parse-matrix s first closer #t last-end-symbol)))
+                     (parse-array s first closer #t last-end-symbol)))
                 (else
-                 (parse-matrix s first closer #f last-end-symbol)))))))
+                 (parse-array s first closer #f last-end-symbol)))))))
 
 (define (kw-to-= e) (if (kwarg? e) (cons '= (cdr e)) e))
 (define (=-to-kw e) (if (assignment? e) (cons 'kw (cdr e)) e))
@@ -2109,16 +2188,44 @@
 (define (unescape-parsed-string-literal strs)
   (map-at even? unescape-string strs))
 
+(define (strip-escaped-newline s raw)
+  (if raw s (map (lambda (s)
+                   (if (string? s) (strip-escaped-newline- s) s))
+                 s)))
+
+;; remove `\` followed by a newline
+(define (strip-escaped-newline- s)
+  (let ((in  (open-input-string s))
+        (out (open-output-string)))
+    (define (loop preceding-backslash?)
+          (let ((c (read-char in)))
+            (cond ((eof-object? c))
+                  (preceding-backslash?
+                   (if (not (eqv? c #\newline))
+                       (begin (write-char #\\ out) (write-char c out))
+                       ((define (loop-)
+                          (if (memv (peek-char in) '(#\space #\tab))
+                              (begin (take-char in) (loop-))))))
+                   (loop #f))
+                  ((eqv? c #\\) (loop #t))
+                  (else (write-char c out) (loop #f)))))
+    (loop #f)
+    (io.tostring! out)))
+
 (define (parse-string-literal s delim raw)
-  (let ((p (ts:port s)))
-    ((if raw identity unescape-parsed-string-literal)
-     (if (eqv? (peek-char p) delim)
-         (if (eqv? (peek-char (take-char p)) delim)
-             (map-first strip-leading-newline
-                        (dedent-triplequoted-string
-                         (parse-string-literal- 2 (take-char p) s delim raw)))
-             (list ""))
-         (parse-string-literal- 0 p s delim raw)))))
+  (let* ((p (ts:port s))
+         (str (if (eqv? (peek-char p) delim)
+                  (if (eqv? (peek-char (take-char p)) delim)
+                      (map-first strip-leading-newline
+                                 (dedent-triplequoted-string
+                                   (strip-escaped-newline
+                                     (parse-string-literal- 2 (take-char p) s delim raw)
+                                     raw)))
+                      (list ""))
+                  (strip-escaped-newline
+                    (parse-string-literal- 0 p s delim raw)
+                    raw))))
+    (if raw str (unescape-parsed-string-literal str))))
 
 (define (strip-leading-newline s)
   (let ((n (sizeof s)))
@@ -2267,8 +2374,11 @@
                     (loop (read-char p) b e 0))))
            (let ((nxch (not-eof-for delim (read-char p))))
              (write-char #\\ b)
-             (write-char nxch b)
-             (loop (read-char p) b e 0))))
+             (if (eqv? nxch #\return)
+                 (loop nxch b e 0)
+                 (begin
+                   (write-char nxch b)
+                   (loop (read-char p) b e 0))))))
 
       ((and (eqv? c #\$) (not raw))
        (let* ((ex (parse-interpolate s))
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index 6dfe3c514c75dd..428b0513b7e526 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -221,10 +221,11 @@
 
 (define (method-expr-name m)
   (let ((name (cadr m)))
+     (let ((name (if (or (length= m 2) (not (pair? name)) (not (quoted? name))) name (cadr name))))
        (cond ((not (pair? name)) name)
              ((eq? (car name) 'outerref) (cadr name))
              ;((eq? (car name) 'globalref) (caddr name))
-             (else name))))
+             (else name)))))
 
 ;; extract static parameter names from a (method ...) expression
 (define (method-expr-static-parameters m)
@@ -252,6 +253,13 @@
             (pair? (caddr e)) (memq (car (caddr e)) '(quote inert))
             (symbol? (cadr (caddr e))))))
 
+(define (overlay? e)
+  (and (pair? e) (eq? (car e) 'overlay)))
+
+(define (sym-ref-or-overlay? e)
+  (or (overlay? e)
+      (sym-ref? e)))
+
 ;; convert final (... x) to (curly Vararg x)
 (define (dots->vararg a)
   (if (null? a) a
@@ -341,14 +349,15 @@
    (let* ((names (map car sparams))
           (anames (map (lambda (x) (if (underscore-symbol? x) UNUSED x)) (llist-vars argl)))
           (unused_anames (filter (lambda (x) (not (eq? x UNUSED))) anames))
-          (ename (if (nodot-sym-ref? name) name `(null))))
+          (ename (if (nodot-sym-ref? name) name
+                    (if (overlay? name) (cadr name) `(null)))))
      (if (has-dups unused_anames)
          (error (string "function argument name not unique: \"" (car (has-dups unused_anames)) "\"")))
      (if (has-dups names)
          (error "function static parameter names not unique"))
      (if (any (lambda (x) (and (not (eq? x UNUSED)) (memq x names))) anames)
          (error "function argument and static parameter names must be distinct"))
-     (if (or (and name (not (sym-ref? name))) (not (valid-name? name)))
+     (if (or (and name (not (sym-ref-or-overlay? name))) (not (valid-name? name)))
          (error (string "invalid function name \"" (deparse name) "\"")))
      (let* ((loc (maybe-remove-functionloc! body))
             (generator (if (expr-contains-p if-generated? body (lambda (x) (not (function-def? x))))
@@ -431,9 +440,14 @@
          (body  (blockify body))
          (ftype (decl-type (car pargl)))
          ;; 1-element list of vararg argument, or empty if none
-         (vararg (let ((l (if (null? pargl) '() (last pargl))))
-                   (if (or (vararg? l) (varargexpr? l))
+         (vararg (let* ((l (if (null? pargl) '() (last pargl)))
+                        ;; handle vararg with default value
+                        (l- (if (kwarg? l) (cadr l) l)))
+                   (if (or (vararg? l-) (varargexpr? l-))
                        (list l) '())))
+         ;; expression to forward varargs to another call
+         (splatted-vararg (if (null? vararg) '()
+                              (list `(... ,(arg-name (car vararg))))))
          ;; positional args with vararg
          (pargl-all pargl)
          ;; positional args without vararg
@@ -509,8 +523,7 @@
                                        ,@(if ordered-defaults keynames vals)
                                        ,@(if (null? restkw) '() `((call (top pairs) (call (core NamedTuple)))))
                                        ,@(map arg-name pargl)
-                                       ,@(if (null? vararg) '()
-                                             (list `(... ,(arg-name (car vararg)))))))))
+                                       ,@splatted-vararg))))
                (if ordered-defaults
                    (scopenest keynames vals ret)
                    ret))))
@@ -568,16 +581,13 @@
                 ,@(if (null? restkw)
                       `((if (call (top isempty) ,rkw)
                             (null)
-                            (call (top kwerr) ,kw ,@(map arg-name pargl)
-                                  ,@(if (null? vararg) '()
-                                        (list `(... ,(arg-name (car vararg))))))))
+                            (call (top kwerr) ,kw ,@(map arg-name pargl) ,@splatted-vararg)))
                       '())
                 (return (call ,mangled  ;; finally, call the core function
                               ,@keynames
                               ,@(if (null? restkw) '() (list rkw))
                               ,@(map arg-name pargl)
-                              ,@(if (null? vararg) '()
-                                    (list `(... ,(arg-name (car vararg)))))))))))
+                              ,@splatted-vararg))))))
         ;; return primary function
         ,(if (not (symbol? name))
              '(null) name)))))
@@ -664,8 +674,7 @@
   (define (throw-unassigned argname)
     `(call (core throw) (call (core UndefKeywordError) (inert ,argname))))
   (define (to-kw x)
-    (cond ((symbol? x) `(kw ,x ,(throw-unassigned x)))
-          ((decl? x) `(kw ,x ,(throw-unassigned (cadr x))))
+    (cond ((symdecl? x) `(kw ,x ,(throw-unassigned (decl-var x))))
           ((nospecialize-meta? x) `(meta ,(cadr x) ,(to-kw (caddr x))))
           (else x)))
   (if (has-parameters? argl)
@@ -882,9 +891,20 @@
 
 (define (struct-def-expr- name params bounds super fields0 mut)
   (receive
-   (fields defs) (separate (lambda (x) (or (symbol? x) (decl? x)))
+   (fields defs) (separate (lambda (x) (or (symbol? x) (eventually-decl? x)))
                            fields0)
-   (let* ((defs        (filter (lambda (x) (not (effect-free? x))) defs))
+   (let* ((attrs ())
+          (fields (let ((n 0))
+                    (map (lambda (x)
+                           (set! n (+ n 1))
+                           (if (and (pair? x) (not (decl? x)))
+                               (begin
+                                 (set! attrs (cons (quotify (car x)) (cons n attrs)))
+                                 (cadr x))
+                               x))
+                         fields)))
+          (attrs (reverse attrs))
+          (defs        (filter (lambda (x) (not (effect-free? x))) defs))
           (locs        (if (and (pair? fields0) (linenum? (car fields0)))
                            (list (car fields0))
                            '()))
@@ -912,6 +932,7 @@
          (toplevel-only struct (outerref ,name))
          (= ,name (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@params)
                         (call (core svec) ,@(map quotify field-names))
+                        (call (core svec) ,@attrs)
                         ,mut ,min-initialized))
          (call (core _setsuper!) ,name ,super)
          (if (isdefined (outerref ,name))
@@ -1130,13 +1151,14 @@
                   (argl-stmts (lower-destructuring-args argl))
                   (argl       (car argl-stmts))
                   (name       (check-dotop (car argl)))
+                  (argname    (if (overlay? name) (caddr name) name))
                   ;; fill in first (closure) argument
                   (adj-decl (lambda (n) (if (and (decl? n) (length= n 2))
                                             `(|::| |#self#| ,(cadr n))
                                             n)))
-                  (farg    (if (decl? name)
-                               (adj-decl name)
-                               `(|::| |#self#| (call (core Typeof) ,name))))
+                  (farg    (if (decl? argname)
+                               (adj-decl argname)
+                               `(|::| |#self#| (call (core Typeof) ,argname))))
                   (body       (insert-after-meta body (cdr argl-stmts)))
                   (argl    (cdr argl))
                   (argl    (fix-arglist
@@ -1194,7 +1216,7 @@
         (if (null? binds)
             blk
             (cond
-             ((or (symbol? (car binds)) (decl? (car binds)))
+             ((symdecl? (car binds))
               ;; just symbol -> add local
               (loop (cdr binds)
                     `(scope-block
@@ -1218,8 +1240,7 @@
                                 `(local-def ,name))
                            ,(car binds)
                            ,blk)))))
-               ((or (symbol? (cadar binds))
-                    (decl?   (cadar binds)))
+               ((symdecl?   (cadar binds))
                 (let ((vname (decl-var (cadar binds))))
                   (loop (cdr binds)
                         (let ((tmp (make-ssavalue)))
@@ -1274,9 +1295,9 @@
       (if (null? f)
           '()
           (let ((x (car f)))
-            (cond ((or (symbol? x) (decl? x) (linenum? x))
+            (cond ((or (symdecl? x) (linenum? x))
                    (loop (cdr f)))
-                  ((and (assignment? x) (or (symbol? (cadr x)) (decl? (cadr x))))
+                  ((and (assignment? x) (symdecl? (cadr x)))
                    (error (string "\"" (deparse x) "\" inside type definition is reserved")))
                   (else '())))))
     (expand-forms
@@ -1362,6 +1383,9 @@
            (expand-forms (expand-decls 'const (cdr e) #f)))
           (else e)))))
 
+(define (expand-atomic-decl e)
+  (error "unimplemented or unsupported atomic declaration"))
+
 (define (expand-local-or-global-decl e)
   (if (and (symbol? (cadr e)) (length= e 2))
       e
@@ -1432,7 +1456,7 @@
                        (cons R elts)))
                 ((vararg? L)
                  (if (null? (cdr lhss))
-                     (let ((temp (make-ssavalue)))
+                     (let ((temp (if (eventually-call? (cadr L)) (gensy) (make-ssavalue))))
                        `(block ,@(reverse stmts)
                                (= ,temp (tuple ,@rhss))
                                ,@(reverse after)
@@ -1599,7 +1623,7 @@
                    ,(expand-update-operator op op= (car e) rhs T))))
         (else
          (if (and (pair? lhs) (eq? op= '=)
-                  (not (memq (car lhs) '(|.| tuple vcat typed_hcat typed_vcat))))
+                  (not (memq (car lhs) '(|.| tuple vcat ncat typed_hcat typed_vcat typed_ncat))))
              (error (string "invalid assignment location \"" (deparse lhs) "\"")))
          (expand-update-operator- op op= lhs rhs declT))))
 
@@ -1804,6 +1828,10 @@
                      e))))
           ((and (pair? e) (eq? (car e) 'comparison))
            (dot-to-fuse (expand-compare-chain (cdr e)) top))
+          ((and (pair? e) (eq? (car e) '.&&))
+           (make-fuse '(top andand) (cdr e)))
+          ((and (pair? e) (eq? (car e) '|.\|\||))
+           (make-fuse '(top oror) (cdr e)))
           (else e)))
   (let ((e (dot-to-fuse rhs #t)) ; an expression '(fuse func args) if expr is a dot call
         (lhs-view (ref-to-view lhs))) ; x[...] expressions on lhs turn in to view(x, ...) to update x in-place
@@ -1972,6 +2000,113 @@
                           ,@(apply append rows))))
              `(call ,@vcat ,@a))))))
 
+(define (expand-ncat e (hvncat '((top hvncat))))
+  (define (is-row a) (and (pair? a)
+                          (or (eq? (car a) 'row)
+                              (eq? (car a) 'nrow))))
+  (define (is-1d a) (not (any is-row a)))
+  (define (sum xs) (foldl + 0 xs))
+  (define (get-shape a is-row-first d)
+    (define (zip xss) (apply map list xss))
+    (define (get-next x)
+      (cond ((or (not (is-row x))
+                 (and (eq? (car x) 'nrow) (> d (1+ (cadr x))))
+                 (and (eq? (car x) 'row) (> d 1)))
+             (list x))
+            ((eq? (car x) 'nrow) (cddr x))
+            (else (cdr x))))
+    ; describe the shape of the concatenation
+    (cond ((or (= d 0)
+               (and (not is-row-first) (= d 1)))
+           (length a))
+          ((and is-row-first (= d 3))
+           (get-shape a is-row-first (1- d)))
+          (else
+           (let ((ashape
+                 (map (lambda (x)
+                        (get-shape (get-next x) is-row-first (1- d)))
+                      a)))
+             (if (pair? (car ashape))
+                 (let ((zipashape (zip ashape)))
+                   (cons (sum (car zipashape))
+                         (cons (car zipashape)
+                               (map (lambda (x)
+                                      (apply append x))
+                                    (cdr zipashape)))))
+                 (list (sum ashape) ashape))))))
+  (define (get-dims a is-row-first d)
+    (cond ((and (< d 2) (not (is-row (car a))))
+           (list (length a)))
+          ((= d 1)
+           (list (car (get-dims (cdar a) is-row-first 0)) (length a)))
+          ((and (= d 3) is-row-first)
+           (get-dims a is-row-first 2))
+          (else
+           (let ((anext (if (and (pair? (car a))
+                                 (eq? (caar a) 'nrow)
+                                 (= d (1+ (cadar a))))
+                            (cddar a)
+                            (list (car a)))))
+             (cons (length a) (get-dims anext is-row-first (1- d)))))))
+  (define (is-balanced s)
+    ; determine whether there are exactly the same number of elements along each axis
+    (= 0 (sum (map (lambda (x y)
+                     (sum (map (lambda (z)
+                                 (- z y))
+                               x)))
+                   (cdr s) (map car (cdr s))))))
+  (define (hasrows-flatten a)
+    ; (car <result>) stores if a row was observed
+    (foldl (lambda (x y)
+             (let ((r (car y))
+                   (yt (cdr y)))
+               (if (is-row x)
+                   (if (eq? (car x) 'nrow)
+                       (let* ((raflat (append (hasrows-flatten (cddr x))))
+                              (aflat (cdr raflat))
+                              (rinner (car raflat))
+                              (r (if (null? (or r rinner))
+                                     (and r rinner)
+                                     r)))
+                         (if (and (not (null? r))
+                                  (or (null? rinner) (and (not r) rinner))
+                                  (and (= (cadr x) 2) r))
+                             (error "cannot mix space and ;; separators in an array expression, except to wrap a line"))
+                         (cons (if (and (= (cadr x) 2) (null? r))
+                                   #f
+                                   r)
+                               (append aflat yt)))
+                     (if (or (null? r) r)
+                         (cons #t (append (reverse (cdr x)) yt))
+                         (error "cannot mix space and ;; separators in an array expression, except to wrap a line")))
+                 (cons r (cons x yt)))))
+           (list '()) a))
+  (define (tf a) (if a '(true) '(false)))
+  (define (tuplize s)
+    (cons 'tuple (reverse (map (lambda (x)
+                                 (cons 'tuple x))
+                               (cons (list (car s)) (cdr s))))))
+  (let* ((d (cadr e))
+         (a (cddr e))
+         (raflat (hasrows-flatten a))
+         (r (car raflat))
+         (is-row-first (if (null? r) #f r))
+         (aflat (reverse (cdr raflat))))
+    (if (any assignment? aflat)
+        (error (string "misplaced assignment statement in \"" (deparse e) "\"")))
+    (if (has-parameters? aflat)
+        (error "unexpected parameters in array expression"))
+    (expand-forms
+      (if (is-1d a)
+          `(call ,@hvncat ,d ,@aflat)
+          (if (any vararg? aflat)
+              (error (string "Splatting ... in an hvncat with multiple dimensions is not supported"))
+              (let ((shape (get-shape a is-row-first d)))
+                (if (is-balanced shape)
+                    (let ((dims `(tuple ,@(reverse (get-dims a is-row-first d)))))
+                     `(call ,@hvncat ,dims ,(tf is-row-first) ,@aflat))
+                    `(call ,@hvncat ,(tuplize shape) ,(tf is-row-first) ,@aflat))))))))
+
 (define (expand-property-destruct lhss x)
   (if (not (length= lhss 1))
       (error (string "invalid assignment location \"" (deparse lhs) "\"")))
@@ -2020,9 +2155,13 @@
                       ((eq? l x) #t)
                       (else (in-lhs? x (cdr lhss)))))))
         ;; in-lhs? also checks for invalid syntax, so always call it first
-        (let* ((xx  (if (or (and (not (in-lhs? x lhss)) (symbol? x))
-                            (ssavalue? x))
-                        x (make-ssavalue)))
+        (let* ((xx  (cond ((or (and (not (in-lhs? x lhss)) (symbol? x))
+                               (ssavalue? x))
+                            x)
+                          ((and (pair? lhss) (vararg? (last lhss))
+                                (eventually-call? (cadr (last lhss))))
+                           (gensy))
+                          (else (make-ssavalue))))
                (ini (if (eq? x xx) '() (list (sink-assignment xx (expand-forms x)))))
                (n   (length lhss))
                ;; skip last assignment if it is an all-underscore vararg
@@ -2032,22 +2171,40 @@
                               (- n 1)
                               n))
                         n))
-               (st  (gensy)))
+               (st  (gensy))
+               (end '()))
           `(block
             ,@(if (> n 0) `((local ,st)) '())
             ,@ini
             ,@(map (lambda (i lhs)
-                     (expand-forms
-                       (if (vararg? lhs)
-                           `(= ,(cadr lhs) (call (top rest) ,xx ,@(if (eq? i 0) '() `(,st))))
-                           (lower-tuple-assignment
-                             (if (= i (- n 1))
-                                 (list lhs)
-                                 (list lhs st))
-                             `(call (top indexed_iterate)
-                                    ,xx ,(+ i 1) ,@(if (eq? i 0) '() `(,st)))))))
+                     (let ((lhs- (cond ((or (symbol? lhs) (ssavalue? lhs))
+                                        lhs)
+                                       ((vararg? lhs)
+                                        (let ((lhs- (cadr lhs)))
+                                          (if (or (symbol? lhs-) (ssavalue? lhs-))
+                                              lhs
+                                              `(|...| ,(if (eventually-call? lhs-)
+                                                           (gensy)
+                                                           (make-ssavalue))))))
+                                       ;; can't use ssavalues if it's a function definition
+                                       ((eventually-call? lhs) (gensy))
+                                       (else (make-ssavalue)))))
+                       (if (not (eq? lhs lhs-))
+                           (if (vararg? lhs)
+                               (set! end (cons (expand-forms `(= ,(cadr lhs) ,(cadr lhs-))) end))
+                               (set! end (cons (expand-forms `(= ,lhs ,lhs-)) end))))
+                       (expand-forms
+                         (if (vararg? lhs-)
+                             `(= ,(cadr lhs-) (call (top rest) ,xx ,@(if (eq? i 0) '() `(,st))))
+                             (lower-tuple-assignment
+                               (if (= i (- n 1))
+                                   (list lhs-)
+                                   (list lhs- st))
+                               `(call (top indexed_iterate)
+                                      ,xx ,(+ i 1) ,@(if (eq? i 0) '() `(,st))))))))
                    (iota n)
                    lhss)
+            ,@(reverse end)
             (unnecessary ,xx))))))
 
 ;; move an assignment into the last statement of a block to keep more statements at top level
@@ -2125,6 +2282,11 @@
          ;; e = (|.| f x)
          (expand-fuse-broadcast '() e)))
 
+   '.&&
+   (lambda (e) (expand-fuse-broadcast '() e))
+   '|.\|\||
+   (lambda (e) (expand-fuse-broadcast '() e))
+
    '.=
    (lambda (e)
      (expand-fuse-broadcast (cadr e) (caddr e)))
@@ -2140,6 +2302,7 @@
    (lambda (e) (expand-forms (expand-wheres (cadr e) (cddr e))))
 
    'const  expand-const-decl
+   'atomic expand-atomic-decl
    'local  expand-local-or-global-decl
    'global expand-local-or-global-decl
    'local-def expand-local-or-global-decl
@@ -2211,7 +2374,7 @@
                 (expand-tuple-destruct lhss x))))
          ((typed_hcat)
           (error "invalid spacing in left side of indexed assignment"))
-         ((typed_vcat)
+         ((typed_vcat typed_ncat)
           (error "unexpected \";\" in left side of indexed assignment"))
          ((ref)
           ;; (= (ref a . idxs) rhs)
@@ -2248,7 +2411,7 @@
                `(block ,@(cdr e)
                        (decl ,(car e) ,T)
                        (= ,(car e) ,rhs))))))
-         ((vcat)
+         ((vcat ncat)
           ;; (= (vcat . args) rhs)
           (error "use \"(a, b) = ...\" to assign multiple values"))
          (else
@@ -2384,11 +2547,12 @@
    'string
    (lambda (e)
      (expand-forms
-      `(call (top string) ,@(map (lambda (s)
-                                   (if (and (pair? s) (eq? (car s) 'string))
-                                       (cadr s)
-                                       s))
-                                 (cdr e)))))
+       `(call (top string)
+              ,@(map (lambda (s)
+                       (if (and (length= s 2) (eq? (car s) 'string) (string? (cadr s)))
+                           (cadr s)
+                           s))
+                     (cdr e)))))
 
    '|::|
    (lambda (e)
@@ -2483,6 +2647,8 @@
 
    'vcat expand-vcat
 
+   'ncat expand-ncat
+
    'typed_hcat
    (lambda (e)
      (if (any assignment? (cddr e))
@@ -2495,6 +2661,12 @@
            (e (cdr e)))
        (expand-vcat e `((top typed_vcat) ,t) `((top typed_hvcat) ,t) `((top typed_hvcat_rows) ,t))))
 
+   'typed_ncat
+   (lambda (e)
+     (let ((t (cadr e))
+           (e (cdr e)))
+       (expand-ncat e `((top typed_hvncat) ,t))))
+
    '|'|  (lambda (e) (expand-forms `(call |'| ,(cadr e))))
 
    'generator
@@ -2601,15 +2773,13 @@
          ,result)))))
 
 (define (lhs-vars e)
-  (cond ((symbol? e) (list e))
-        ((decl? e)   (list (decl-var e)))
+  (cond ((symdecl? e)   (list (decl-var e)))
         ((and (pair? e) (eq? (car e) 'tuple))
          (apply append (map lhs-vars (cdr e))))
         (else '())))
 
 (define (lhs-decls e)
-  (cond ((symbol? e) (list e))
-        ((decl? e)   (list e))
+  (cond ((symdecl? e)   (list e))
         ((and (pair? e) (eq? (car e) 'tuple))
          (apply append (map lhs-decls (cdr e))))
         (else '())))
@@ -3086,6 +3256,7 @@ f(x) = yt(x)
                 ,@(map (lambda (p n) `(= ,p (call (core TypeVar) ',n (core Any)))) P names)
                 (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@P)
                             (call (core svec) ,@(map quotify fields))
+                            (call (core svec))
                             (false) ,(length fields)))
                 (= (outerref ,name) ,s)
                 (call (core _setsuper!) ,name ,super)
@@ -3099,6 +3270,7 @@ f(x) = yt(x)
                (block (global ,name) (const ,name)
                       (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec))
                                   (call (core svec) ,@(map quotify fields))
+                                  (call (core svec))
                                   (false) ,(length fields)))
                       (= (outerref ,name) ,s)
                       (call (core _setsuper!) ,name ,super)
@@ -3325,8 +3497,8 @@ f(x) = yt(x)
          meta inbounds boundscheck loopinfo decl aliasscope popaliasscope
          thunk with-static-parameters toplevel-only
          global globalref outerref const-if-global thismodule
-         const null true false ssavalue isdefined toplevel module lambda error
-         gc_preserve_begin gc_preserve_end import using export)))
+         const atomic null true false ssavalue isdefined toplevel module lambda
+         error gc_preserve_begin gc_preserve_end import using export inline noinline)))
 
 (define (local-in? s lam)
   (or (assq s (car  (lam:vinfo lam)))
@@ -3573,6 +3745,7 @@ f(x) = yt(x)
                      '(null)
                      `(newvar ,(cadr e))))))
           ((const) e)
+          ((atomic) e)
           ((const-if-global)
            (if (local-in? (cadr e) lam)
                '(null)
@@ -4067,13 +4240,14 @@ f(x) = yt(x)
                   (else #f)))
           (case (car e)
             ((call new splatnew foreigncall cfunction new_opaque_closure)
+             (define (atom-or-not-tuple-call? fptr)
+               (or (atom? fptr)
+                   (not (tuple-call? fptr))))
              (let* ((args
                      (cond ((eq? (car e) 'foreigncall)
                             ;; NOTE: 2nd to 5th arguments of ccall must be left in place
                             ;;       the 1st should be compiled if an atom.
-                            (append (if (let ((fptr (cadr e)))
-                                          (or (atom? fptr)
-                                              (not (tuple-call? fptr))))
+                            (append (if (atom-or-not-tuple-call? (cadr e))
                                         (compile-args (list (cadr e)) break-labels)
                                         (list (cadr e)))
                                     (list-head (cddr e) 4)
@@ -4092,12 +4266,15 @@ f(x) = yt(x)
                                   (compile-args (list-head (cdr e) 4) break-labels)
                                   (list (append (butlast oc_method) (list lambda)))
                                   (compile-args (list-tail (cdr e) 5) break-labels))))
-                           ;; TODO: evaluate first argument to cglobal some other way
+                           ;; NOTE: 1st argument to cglobal treated same as for ccall
                            ((and (length> e 2)
                                  (or (eq? (cadr e) 'cglobal)
                                      (equal? (cadr e) '(outerref cglobal))))
-                            (list* (cadr e) (caddr e)
-                                   (compile-args (cdddr e) break-labels)))
+                            (append (list (cadr e))
+                                    (if (atom-or-not-tuple-call? (caddr e))
+                                        (compile-args (list (caddr e)) break-labels)
+                                        (list (caddr e)))
+                                    (compile-args (cdddr e) break-labels)))
                            (else
                             (compile-args (cdr e) break-labels))))
                     (callex (cons (car e) args)))
@@ -4352,6 +4529,7 @@ f(x) = yt(x)
                      (if (not global-const-error)
                          (set! global-const-error current-loc))
                      (emit e))))
+            ((atomic) (error "misplaced atomic declaration"))
             ((isdefined) (if tail (emit-return e) e))
             ((boundscheck) (if tail (emit-return e) e))
 
@@ -4414,7 +4592,7 @@ f(x) = yt(x)
                (cons (car e) args)))
 
             ;; metadata expressions
-            ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope)
+            ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline)
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
                (cond ((eq? (car e) 'line)
                       (set! current-loc e)
@@ -4559,7 +4737,7 @@ f(x) = yt(x)
           (begin (set! linetable (cons (make-lineinfo name file line) linetable))
                  (set! current-loc 1)))
       (if (or reachable
-              (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope))))
+              (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope inline noinline))))
           (begin (set! code (cons e code))
                  (set! i (+ i 1))
                  (set! locs (cons current-loc locs)))))
diff --git a/src/julia.expmap b/src/julia.expmap
index d9d255cc8c547a..5f03eccbfcad6c 100644
--- a/src/julia.expmap
+++ b/src/julia.expmap
@@ -37,7 +37,6 @@
     _IO_stdin_used;
     __ZN4llvm23createLowerSimdLoopPassEv;
     LLVMExtra*;
-    repl_entrypoint;
 
     /* freebsd */
     environ;
diff --git a/src/julia.h b/src/julia.h
index 3b28558fcd695c..e53b33bef674da 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -9,6 +9,7 @@
 #define STORE_ARRAY_LEN
 //** End Configuration options **//
 
+#include "julia_fasttls.h"
 #include "libsupport.h"
 #include <stdint.h>
 #include <string.h>
@@ -37,6 +38,13 @@
 #  define MAX_ALIGN 8
 #endif
 
+// Define the largest size (bytes) of a properly aligned object that the
+// processor family and compiler typically supports without a lock
+// (assumed to be at least a pointer size). Since C is bad at handling 16-byte
+// types, we currently use 8 here as the default.
+#define MAX_ATOMIC_SIZE 8
+#define MAX_POINTERATOMIC_SIZE 8
+
 #ifdef _P64
 #define NWORDS(sz) (((sz)+7)>>3)
 #else
@@ -65,32 +73,12 @@
 #  define JL_THREAD_LOCAL
 #endif
 
-#if defined(__has_feature) // Clang flavor
-#if __has_feature(address_sanitizer)
-#define JL_ASAN_ENABLED
-#endif
-#if __has_feature(memory_sanitizer)
-#define JL_MSAN_ENABLED
-#endif
-#if __has_feature(thread_sanitizer)
-#if __clang_major__ < 11
-#error Thread sanitizer runtime libraries in clang < 11 leak memory and cannot be used
-#endif
-#define JL_TSAN_ENABLED
-#endif
-#else // GCC flavor
-#if defined(__SANITIZE_ADDRESS__)
-#define JL_ASAN_ENABLED
-#endif
-#endif // __has_feature
-
 #define container_of(ptr, type, member) \
     ((type *) ((char *)(ptr) - offsetof(type, member)))
 
 typedef struct _jl_taggedvalue_t jl_taggedvalue_t;
 
 #include "atomics.h"
-#include "tls.h"
 #include "julia_threads.h"
 #include "julia_assert.h"
 
@@ -267,7 +255,8 @@ typedef struct _jl_code_info_t {
     jl_value_t *ssavaluetypes; // types of ssa values (or count of them)
     jl_array_t *ssaflags; // flags associated with each statement:
         // 0 = inbounds
-        // 1,2 = <reserved> inlinehint,always-inline,noinline
+        // 1 = inline
+        // 2 = noinline
         // 3 = <reserved> strict-ieee (strictfp)
         // 4 = effect-free (may be deleted if unused)
         // 5-6 = <unused>
@@ -311,6 +300,7 @@ typedef struct _jl_method_t {
     jl_array_t *speckeyset; // index lookup by hash into specializations
 
     jl_value_t *slot_syms; // compacted list of slot names (String)
+    jl_value_t *external_mt; // reference to the method table this method is part of, null if part of the internal table
     jl_value_t *source;  // original code template (jl_code_info_t, but may be compressed), null for builtins
     struct _jl_method_instance_t *unspecialized;  // unspecialized executable method instance, or null
     jl_value_t *generator;  // executable code-generating function if available
@@ -423,14 +413,21 @@ typedef struct {
     jl_sym_t *name;
     struct _jl_module_t *module;
     jl_svec_t *names;  // field names
+    const uint32_t *atomicfields; // if any fields are atomic, we record them here
+    //const uint32_t *constfields; // if any fields are const, we record them here
     // `wrapper` is either the only instantiation of the type (if no parameters)
     // or a UnionAll accepting parameters to make an instantiation.
     jl_value_t *wrapper;
     jl_svec_t *cache;        // sorted array
     jl_svec_t *linearcache;  // unsorted array
-    intptr_t hash;
     struct _jl_methtable_t *mt;
     jl_array_t *partial;     // incomplete instantiations of this type
+    intptr_t hash;
+    int32_t n_uninitialized;
+    // type properties
+    uint8_t abstract:1;
+    uint8_t mutabl:1;
+    uint8_t mayinlinealloc:1;
 } jl_typename_t;
 
 typedef struct {
@@ -483,23 +480,18 @@ typedef struct _jl_datatype_t {
     struct _jl_datatype_t *super;
     jl_svec_t *parameters;
     jl_svec_t *types;
-    jl_svec_t *names;
     jl_value_t *instance;  // for singletons
     const jl_datatype_layout_t *layout;
     int32_t size; // TODO: move to _jl_datatype_layout_t
-    int32_t ninitialized;
-    uint32_t hash;
-    uint8_t abstract;
-    uint8_t mutabl;
     // memoized properties
-    uint8_t hasfreetypevars; // majority part of isconcrete computation
-    uint8_t isconcretetype; // whether this type can have instances
-    uint8_t isdispatchtuple; // aka isleaftupletype
-    uint8_t isbitstype; // relevant query for C-api and type-parameters
-    uint8_t zeroinit; // if one or more fields requires zero-initialization
-    uint8_t isinlinealloc; // if this is allocated inline
-    uint8_t has_concrete_subtype; // If clear, no value will have this datatype
-    uint8_t cached_by_hash; // stored in hash-based set cache (instead of linear cache)
+    uint32_t hash;
+    uint8_t hasfreetypevars:1; // majority part of isconcrete computation
+    uint8_t isconcretetype:1; // whether this type can have instances
+    uint8_t isdispatchtuple:1; // aka isleaftupletype
+    uint8_t isbitstype:1; // relevant query for C-api and type-parameters
+    uint8_t zeroinit:1; // if one or more fields requires zero-initialization
+    uint8_t has_concrete_subtype:1; // If clear, no value will have this datatype
+    uint8_t cached_by_hash:1; // stored in hash-based set cache (instead of linear cache)
 } jl_datatype_t;
 
 typedef struct _jl_vararg_t {
@@ -676,6 +668,7 @@ extern JL_DLLIMPORT jl_datatype_t *jl_initerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_typeerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_methoderror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_undefvarerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT jl_datatype_t *jl_atomicerror_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_lineinfonode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_stackovf_exception JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_memory_exception JL_GLOBALLY_ROOTED;
@@ -715,6 +708,7 @@ extern JL_DLLIMPORT jl_typename_t *jl_llvmpointer_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_typename_t *jl_namedtuple_typename JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_unionall_t *jl_namedtuple_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_task_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_pair_type JL_GLOBALLY_ROOTED;
 
 extern JL_DLLIMPORT jl_value_t *jl_array_uint8_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_array_any_type JL_GLOBALLY_ROOTED;
@@ -745,11 +739,11 @@ extern JL_DLLIMPORT jl_value_t *jl_nothing JL_GLOBALLY_ROOTED;
 
 // gc -------------------------------------------------------------------------
 
-typedef struct _jl_gcframe_t {
+struct _jl_gcframe_t {
     size_t nroots;
     struct _jl_gcframe_t *prev;
     // actual roots go here
-} jl_gcframe_t;
+};
 
 // NOTE: it is the caller's responsibility to make sure arguments are
 // rooted such that the gc can see them on the stack.
@@ -760,7 +754,7 @@ typedef struct _jl_gcframe_t {
 // jl_value_t *x=NULL, *y=NULL; JL_GC_PUSH2(&x, &y);
 // x = f(); y = g(); foo(x, y)
 
-#define jl_pgcstack (jl_get_ptls_states()->pgcstack)
+#define jl_pgcstack (jl_current_task->gcstack)
 
 #define JL_GC_ENCODE_PUSHARGS(n)   (((size_t)(n))<<2)
 #define JL_GC_ENCODE_PUSH(n)       ((((size_t)(n))<<2)|1)
@@ -838,7 +832,8 @@ typedef enum {
 
 JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t);
 
-JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f);
+JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_finalize(jl_value_t *o);
 JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void);
@@ -853,10 +848,10 @@ JL_DLLEXPORT void jl_gc_use(jl_value_t *a);
 JL_DLLEXPORT void jl_clear_malloc_data(void);
 
 // GC write barriers
-JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *root) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_gc_queue_multiroot(jl_value_t *root, jl_value_t *stored) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t *stored) JL_NOTSAFEPOINT;
 
-STATIC_INLINE void jl_gc_wb(void *parent, void *ptr) JL_NOTSAFEPOINT
+STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
 {
     // parent and ptr isa jl_value_t*
     if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset
@@ -864,7 +859,7 @@ STATIC_INLINE void jl_gc_wb(void *parent, void *ptr) JL_NOTSAFEPOINT
         jl_gc_queue_root((jl_value_t*)parent);
 }
 
-STATIC_INLINE void jl_gc_wb_back(void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
+STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
 {
     // if ptr is old
     if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3)) {
@@ -872,7 +867,7 @@ STATIC_INLINE void jl_gc_wb_back(void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_
     }
 }
 
-STATIC_INLINE void jl_gc_multi_wb(void *parent, jl_value_t *ptr) JL_NOTSAFEPOINT
+STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
 {
     // ptr is an immutable object
     if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
@@ -1023,14 +1018,7 @@ JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_RO
 #define jl_get_fieldtypes(st) ((st)->types ? (st)->types : jl_compute_fieldtypes((st), NULL))
 STATIC_INLINE jl_svec_t *jl_field_names(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
-    jl_svec_t *names = st->names;
-    if (!names)
-        names = st->name->names;
-    return names;
-}
-STATIC_INLINE jl_sym_t *jl_field_name(jl_datatype_t *st, size_t i) JL_NOTSAFEPOINT
-{
-    return (jl_sym_t*)jl_svecref(jl_field_names(st), i);
+    return st->name->names;
 }
 STATIC_INLINE jl_value_t *jl_field_type(jl_datatype_t *st JL_PROPAGATES_ROOT, size_t i)
 {
@@ -1046,7 +1034,6 @@ STATIC_INLINE jl_value_t *jl_field_type_concrete(jl_datatype_t *st JL_PROPAGATES
 #define jl_datatype_align(t)   (((jl_datatype_t*)t)->layout->alignment)
 #define jl_datatype_nbits(t)   ((((jl_datatype_t*)t)->size)*8)
 #define jl_datatype_nfields(t) (((jl_datatype_t*)(t))->layout->nfields)
-#define jl_datatype_isinlinealloc(t) (((jl_datatype_t *)(t))->isinlinealloc)
 
 JL_DLLEXPORT void *jl_symbol_name(jl_sym_t *s);
 // inline version with strong type check to detect typos in a `->name` chain
@@ -1123,6 +1110,17 @@ static inline uint32_t jl_ptr_offset(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
     }
 }
 
+static inline int jl_field_isatomic(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
+{
+    // if (!st->mutable) return 0; // TODO: is this fast-path helpful?
+    const uint32_t *atomicfields = st->name->atomicfields;
+    if (atomicfields != NULL) {
+        if (atomicfields[i / 32] & (1 << (i % 32)))
+            return 1;
+    }
+    return 0;
+}
+
 static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
 {
     return l->nfields == 0 && l->npointers > 0;
@@ -1135,10 +1133,10 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_svec(v)        jl_typeis(v,jl_simplevector_type)
 #define jl_is_simplevector(v) jl_is_svec(v)
 #define jl_is_datatype(v)    jl_typeis(v,jl_datatype_type)
-#define jl_is_mutable(t)     (((jl_datatype_t*)t)->mutabl)
-#define jl_is_mutable_datatype(t) (jl_is_datatype(t) && (((jl_datatype_t*)t)->mutabl))
-#define jl_is_immutable(t)   (!((jl_datatype_t*)t)->mutabl)
-#define jl_is_immutable_datatype(t) (jl_is_datatype(t) && (!((jl_datatype_t*)t)->mutabl))
+#define jl_is_mutable(t)     (((jl_datatype_t*)t)->name->mutabl)
+#define jl_is_mutable_datatype(t) (jl_is_datatype(t) && (((jl_datatype_t*)t)->name->mutabl))
+#define jl_is_immutable(t)   (!((jl_datatype_t*)t)->name->mutabl)
+#define jl_is_immutable_datatype(t) (jl_is_datatype(t) && (!((jl_datatype_t*)t)->name->mutabl))
 #define jl_is_uniontype(v)   jl_typeis(v,jl_uniontype_type)
 #define jl_is_typevar(v)     jl_typeis(v,jl_tvar_type)
 #define jl_is_unionall(v)    jl_typeis(v,jl_unionall_type)
@@ -1207,7 +1205,7 @@ STATIC_INLINE int jl_is_primitivetype(void *v) JL_NOTSAFEPOINT
 STATIC_INLINE int jl_is_structtype(void *v) JL_NOTSAFEPOINT
 {
     return (jl_is_datatype(v) &&
-            !((jl_datatype_t*)(v))->abstract &&
+            !((jl_datatype_t*)(v))->name->abstract &&
             !jl_is_primitivetype(v));
 }
 
@@ -1223,7 +1221,7 @@ STATIC_INLINE int jl_is_datatype_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 
 STATIC_INLINE int jl_is_abstracttype(void *v) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(v) && ((jl_datatype_t*)(v))->abstract);
+    return (jl_is_datatype(v) && ((jl_datatype_t*)(v))->name->abstract);
 }
 
 STATIC_INLINE int jl_is_array_type(void *t) JL_NOTSAFEPOINT
@@ -1302,25 +1300,31 @@ STATIC_INLINE int jl_is_array_zeroinit(jl_array_t *a) JL_NOTSAFEPOINT
 }
 
 // object identity
-JL_DLLEXPORT int jl_egal(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_egal__bits(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_egal__special(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT;
 
-STATIC_INLINE int jl_egal_(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
+STATIC_INLINE int jl_egal__unboxed_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
-    if (a == b)
-        return 1;
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
-    if (dt != (jl_datatype_t*)jl_typeof(b))
-        return 0;
-    if (dt->mutabl) {
+    if (dt->name->mutabl) {
         if (dt == jl_simplevector_type || dt == jl_string_type || dt == jl_datatype_type)
             return jl_egal__special(a, b, dt);
         return 0;
     }
     return jl_egal__bits(a, b, dt);
 }
+
+STATIC_INLINE int jl_egal_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
+{
+    if (a == b)
+        return 1;
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
+    if (dt != (jl_datatype_t*)jl_typeof(b))
+        return 0;
+    return jl_egal__unboxed_(a, b, dt);
+}
 #define jl_egal(a, b) jl_egal_((a), (b))
 
 // type predicates and basic operations
@@ -1354,19 +1358,23 @@ STATIC_INLINE int jl_is_concrete_type(jl_value_t *v) JL_NOTSAFEPOINT
 JL_DLLEXPORT int jl_isa_compileable_sig(jl_tupletype_t *type, jl_method_t *definition);
 
 // type constructors
-JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *inmodule);
+JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *inmodule, int abstract, int mutabl);
 JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_t *ub);
 JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p);
 JL_DLLEXPORT jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n);
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1);
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2);
+JL_DLLEXPORT jl_datatype_t *jl_apply_modify_type(jl_value_t *dt);
+JL_DLLEXPORT jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt);
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params);
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(jl_sym_t *name,
                                             jl_module_t *module,
                                             jl_datatype_t *super,
                                             jl_svec_t *parameters,
-                                            jl_svec_t *fnames, jl_svec_t *ftypes,
+                                            jl_svec_t *fnames,
+                                            jl_svec_t *ftypes,
+                                            jl_svec_t *fattrs,
                                             int abstract, int mutabl,
                                             int ninitialized);
 JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name,
@@ -1375,7 +1383,12 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name,
                                                  jl_svec_t *parameters, size_t nbits);
 
 // constructors
-JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *bt, void *data);
+JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *bt, const void *src);
+JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *src);
+JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb);
+JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb);
+JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
+JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettype, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...);
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na);
 JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup);
@@ -1399,7 +1412,7 @@ JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
                                                  jl_module_t *module,
                                                  jl_value_t **bp, jl_value_t *bp_owner,
                                                  jl_binding_t *bnd);
-JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata, jl_code_info_t *f, jl_module_t *module);
+JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
 JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT;
@@ -1462,8 +1475,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i);
 // Like jl_get_nth_field above, but asserts if it needs to allocate
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i);
-JL_DLLEXPORT void        jl_set_nth_field(jl_value_t *v, size_t i,
-                                          jl_value_t *rhs) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void        jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int         jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld);
 JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a);
@@ -1531,11 +1543,10 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var
 JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var);
 // get binding for assignment
 JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int error);
-JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT,
-                                                         jl_sym_t *var);
+JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
@@ -1549,7 +1560,7 @@ JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from, jl_sym_t
 JL_DLLEXPORT void jl_module_import_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
 JL_DLLEXPORT void jl_module_export(jl_module_t *from, jl_sym_t *s);
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *s);
-JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m);
 STATIC_INLINE jl_function_t *jl_get_function(jl_module_t *m, const char *name)
 {
@@ -1593,6 +1604,7 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error_rt(const char *fname,
                                                jl_value_t *ty JL_MAYBE_UNROOTED,
                                                jl_value_t *got JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var);
+JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v JL_MAYBE_UNROOTED,
                                               jl_value_t *t JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error_v(jl_value_t *v JL_MAYBE_UNROOTED,
@@ -1639,10 +1651,6 @@ typedef enum {
     JL_IMAGE_JULIA_HOME = 1,
     //JL_IMAGE_LIBJULIA = 2,
 } JL_IMAGE_SEARCH;
-// this helps turn threading compilation mismatches into linker errors
-#define julia_init julia_init__threading
-#define jl_init jl_init__threading
-#define jl_init_with_image jl_init_with_image__threading
 
 JL_DLLEXPORT const char *jl_get_libdir(void);
 JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel);
@@ -1703,7 +1711,7 @@ enum JL_RTLD_CONSTANT {
 #define JL_RTLD_DEFAULT (JL_RTLD_LAZY | JL_RTLD_DEEPBIND)
 
 typedef void *jl_uv_libhandle; // compatible with dlopen (void*) / LoadLibrary (HMODULE)
-JL_DLLEXPORT jl_uv_libhandle jl_load_dynamic_library(const char *fname, unsigned flags, int throw_err) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_uv_libhandle jl_load_dynamic_library(const char *fname, unsigned flags, int throw_err);
 JL_DLLEXPORT jl_uv_libhandle jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_dlclose(jl_uv_libhandle handle) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_dlsym(jl_uv_libhandle handle, const char *symbol, void ** value, int throw_err) JL_NOTSAFEPOINT;
@@ -1806,6 +1814,10 @@ typedef struct _jl_task_t {
     jl_value_t *result;
     jl_value_t *logstate;
     jl_function_t *start;
+    uint64_t rngState0; // really rngState[4], but more convenient to split
+    uint64_t rngState1;
+    uint64_t rngState2;
+    uint64_t rngState3;
     uint8_t _state;
     uint8_t sticky; // record whether this Task can be migrated to a new thread
     uint8_t _isexception; // set if `result` is an exception to throw or that we exited with
@@ -1815,6 +1827,11 @@ typedef struct _jl_task_t {
     int16_t tid;
     // multiqueue priority
     int16_t prio;
+    // saved gc stack top for context switches
+    jl_gcframe_t *gcstack;
+    size_t world_age;
+    // quick lookup for current ptls
+    jl_tls_states_t *ptls; // == jl_all_tls_states[tid]
     // saved exception stack
     jl_excstack_t *excstack;
     // current exception handler
@@ -1828,16 +1845,13 @@ typedef struct _jl_task_t {
         struct jl_stack_context_t copy_stack_ctx;
 #endif
     };
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
     void *tsan_state;
 #endif
     void *stkbuf; // malloc'd memory (either copybuf or stack)
     size_t bufsz; // actual sizeof stkbuf
     unsigned int copy_stack:31; // sizeof stack for copybuf
     unsigned int started:1;
-
-    // saved gc stack top for context switches
-    jl_gcframe_t *gcstack;
 } jl_task_t;
 
 #define JL_TASK_STATE_RUNNABLE 0
@@ -1846,11 +1860,14 @@ typedef struct _jl_task_t {
 
 JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t*, jl_value_t*, size_t);
 JL_DLLEXPORT void jl_switchto(jl_task_t **pt);
+JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow(void);
 JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e);
+JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
+#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))
 
 #include "locks.h"   // requires jl_task_t definition
 
@@ -1965,7 +1982,8 @@ JL_DLLEXPORT jl_value_t *jl_stdout_obj(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT; // deprecated
 // Mainly for debugging, use `void*` so that no type cast is needed in C++.
 JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT;
 
@@ -1991,6 +2009,7 @@ typedef struct {
     int8_t code_coverage;
     int8_t malloc_log;
     int8_t opt_level;
+    int8_t opt_level_min;
     int8_t debug_level;
     int8_t check_bounds;
     int8_t depwarn;
@@ -2113,13 +2132,13 @@ typedef struct {
     float value;
 } jl_nullable_float32_t;
 
-#define jl_current_task (jl_get_ptls_states()->current_task)
-#define jl_root_task (jl_get_ptls_states()->root_task)
+#define jl_root_task (jl_current_task->ptls->root_task)
 
-JL_DLLEXPORT jl_value_t *jl_get_current_task(void);
+JL_DLLEXPORT jl_task_t *jl_get_current_task(void) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void);
-JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *);
+// TODO: we need to pin the task while using this (set pure bit)
+JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *) JL_NOTSAFEPOINT;
 
 // codegen interface ----------------------------------------------------------
 // The root propagation here doesn't have to be literal, but callers should
@@ -2146,23 +2165,6 @@ typedef struct {
 extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
 
-#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
-#define JULIA_DEFINE_FAST_TLS()                                                             \
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_static(void)                        \
-{                                                                                           \
-    static __attribute__((tls_model("local-exec"))) __thread jl_tls_states_t tls_states;    \
-    return &tls_states;                                                                     \
-}                                                                                           \
-__attribute__((constructor)) void jl_register_ptls_states_getter(void)                      \
-{                                                                                           \
-    /* We need to make sure this function is called before any reference to */              \
-    /* TLS variables. */                                                                    \
-    jl_set_ptls_states_getter(jl_get_ptls_states_static);                                   \
-}
-#else
-#define JULIA_DEFINE_FAST_TLS()
-#endif
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/julia_assert.h b/src/julia_assert.h
index 6cf89d0e470a59..4b120fd9e845b3 100644
--- a/src/julia_assert.h
+++ b/src/julia_assert.h
@@ -21,7 +21,10 @@
 #  endif
 #else
 #  ifdef JL_NDEBUG
-#    undef JL_NDEBUG
+#    define NDEBUG
+#    include <assert.h>
+#    undef NDEBUG
+#  else
+#    include <assert.h>
 #  endif
-#  include <assert.h>
 #endif
diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h
new file mode 100644
index 00000000000000..3930a6d8a2d3ce
--- /dev/null
+++ b/src/julia_fasttls.h
@@ -0,0 +1,44 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_FASTTLS_H
+#define JL_FASTTLS_H
+
+// Thread-local storage access
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Bring in definitions for `_OS_X_`, `PATH_MAX` and `PATHSEPSTRING`, `jl_ptls_t`, etc... */
+#include "platform.h"
+#include "dirpath.h"
+
+typedef struct _jl_gcframe_t jl_gcframe_t;
+
+#if defined(_OS_DARWIN_)
+#include <pthread.h>
+typedef void *(jl_get_pgcstack_func)(pthread_key_t); // aka typeof(pthread_getspecific)
+#else
+typedef jl_gcframe_t **(jl_get_pgcstack_func)(void);
+#endif
+
+#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
+#define JULIA_DEFINE_FAST_TLS                                                                   \
+static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec;  \
+JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack_static(void)                                        \
+{                                                                                               \
+    return jl_pgcstack_localexec;                                                               \
+}                                                                                               \
+JL_DLLEXPORT jl_gcframe_t ***jl_pgcstack_addr_static(void)                                      \
+{                                                                                               \
+    return &jl_pgcstack_localexec;                                                              \
+}
+#else
+#define JULIA_DEFINE_FAST_TLS
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/julia_gcext.h b/src/julia_gcext.h
index 3ca729e722dba4..6787dafb4b7ee6 100644
--- a/src/julia_gcext.h
+++ b/src/julia_gcext.h
@@ -49,6 +49,8 @@ JL_DLLEXPORT jl_datatype_t *jl_new_foreign_type(
         int haspointers,
         int large);
 
+JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt);
+
 JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void);
 JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void);
 
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 96b01f153760f4..c0d492241cf0dd 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -15,11 +15,11 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-#ifdef JL_ASAN_ENABLED
+#ifdef _COMPILER_ASAN_ENABLED_
 void __sanitizer_start_switch_fiber(void**, const void*, size_t);
 void __sanitizer_finish_switch_fiber(void*, const void**, size_t*);
 #endif
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
 void *__tsan_create_fiber(unsigned flags);
 void *__tsan_get_current_fiber(void);
 void __tsan_destroy_fiber(void *fiber);
@@ -154,8 +154,9 @@ static inline uint64_t cycleclock(void)
 
 #include "timing.h"
 
-extern uint8_t *jl_measure_compile_time;
-extern uint64_t *jl_cumulative_compile_time;
+// Global *atomic* integers controlling *process-wide* measurement of compilation time.
+extern uint8_t jl_measure_compile_time_enabled;
+extern uint64_t jl_cumulative_compile_time;
 
 #ifdef _COMPILER_MICROSOFT_
 #  define jl_return_address() ((uintptr_t)_ReturnAddress())
@@ -175,6 +176,26 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 }
 
 
+// this is a version of memcpy that preserves atomic memory ordering
+// which makes it safe to use for objects that can contain memory references
+// without risk of creating pointers out of thin air
+// TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32
+//       aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit)
+static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
+{
+    size_t i;
+    if (dstp < srcp || dstp > srcp + n) {
+        for (i = 0; i < n; i++) {
+            jl_atomic_store_relaxed(dstp + i, jl_atomic_load_relaxed(srcp + i));
+        }
+    }
+    else {
+        for (i = 0; i < n; i++) {
+            jl_atomic_store_relaxed(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
+        }
+    }
+}
+
 // -- gc.c -- //
 
 #define GC_CLEAN  0 // freshly allocated
@@ -227,12 +248,15 @@ static const int jl_gc_sizeclasses[] = {
     4, 8, 12,
 #endif
 
-    // 16 pools at 16-byte spacing
-    16, 32, 48, 64, 80, 96, 112, 128,
+    // 16 pools at 8-byte spacing
+    // the 8-byte aligned pools are only used for Strings
+    16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136,
+    // 8 pools at 16-byte spacing
     144, 160, 176, 192, 208, 224, 240, 256,
 
     // the following tables are computed for maximum packing efficiency via the formula:
-    // sz=(div(2^14-8,rng)÷16)*16; hcat(sz, (2^14-8)÷sz, 2^14-(2^14-8)÷sz.*sz)'
+    // pg = 2^14
+    // sz = (div.(pg-8, rng).÷16)*16; hcat(sz, (pg-8).÷sz, pg .- (pg-8).÷sz.*sz)'
 
     // rng = 60:-4:32 (8 pools)
     272, 288, 304, 336, 368, 400, 448, 496,
@@ -273,15 +297,14 @@ STATIC_INLINE int jl_gc_alignment(size_t sz)
 }
 JL_DLLEXPORT int jl_alignment(size_t sz);
 
-// the following table is computed from jl_gc_sizeclasses via the formula:
-// [searchsortedfirst(TABLE, i) for i = 0:16:table[end]]
-static const uint8_t szclass_table[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40};
+// the following table is computed as:
+// [searchsortedfirst(jl_gc_sizeclasses, i) - 1 for i = 0:16:jl_gc_sizeclasses[end]]
+static const uint8_t szclass_table[] = {0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48};
 static_assert(sizeof(szclass_table) == 128, "");
 
 STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz)
 {
     assert(sz <= 2032);
-    uint8_t klass = szclass_table[(sz + 15) / 16];
 #ifdef _P64
     if (sz <= 8)
         return 0;
@@ -295,14 +318,31 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz)
         return (sz >= 8 ? 2 : (sz >= 4 ? 1 : 0));
     const int N = 2;
 #endif
+    uint8_t klass = szclass_table[(sz + 15) / 16];
     return klass + N;
 }
 
+STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz)
+{
+    if (sz >= 16 && sz <= 152) {
+#ifdef _P64
+        const int N = 0;
+#elif MAX_ALIGN == 8
+        const int N = 1;
+#else
+        const int N = 2;
+#endif
+        return (sz + 7)/8 - 1 + N;
+    }
+    return jl_gc_szclass(sz);
+}
+
 #define JL_SMALL_BYTE_ALIGNMENT 16
 #define JL_CACHE_BYTE_ALIGNMENT 64
 // JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide
 #define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT
 #define GC_MAX_SZCLASS (2032-sizeof(void*))
+static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, "");
 
 STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
 {
@@ -349,7 +389,7 @@ STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
     return jl_gc_alloc(ptls, sz, (void*)jl_buff_tag);
 }
 
-STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty)
+STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
 {
     const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
     unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
@@ -367,7 +407,7 @@ jl_value_t *jl_permbox64(jl_datatype_t *t, int64_t x);
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
 
 // this sizeof(__VA_ARGS__) trick can't be computed until C11, but that only matters to Clang in some situations
-#if !defined(__clang_analyzer__) && !(defined(JL_ASAN_ENABLED) || defined(JL_TSAN_ENABLED))
+#if !defined(__clang_analyzer__) && !(defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_))
 #ifdef __GNUC__
 #define jl_perm_symsvec(n, ...) \
     (jl_perm_symsvec)(__extension__({                                         \
@@ -396,7 +436,7 @@ JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
 void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT;
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
-void jl_gc_run_all_finalizers(jl_ptls_t ptls);
+void jl_gc_run_all_finalizers(jl_task_t *ct);
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
 
 void gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT;
@@ -413,8 +453,8 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT
 {
     // if parent is marked and buf is not
     if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) {
-        jl_ptls_t ptls = jl_get_ptls_states();
-        gc_setmark_buf(ptls, bufptr, 3, minsz);
+        jl_task_t *ct = jl_current_task;
+        gc_setmark_buf(ct->ptls, bufptr, 3, minsz);
     }
 }
 
@@ -430,7 +470,7 @@ STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NO
      if (dt->layout->first_ptr >= 0) {
         jl_value_t *nullp = ((jl_value_t**)v)[dt->layout->first_ptr];
         if (__unlikely(nullp == NULL))
-            jl_throw(jl_undefref_exception);
+            return NULL;
     }
     return v;
 }
@@ -499,6 +539,9 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
 jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED;
 int jl_obviously_unequal(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v);
+int jl_has_fixed_layout(jl_datatype_t *t);
+int jl_struct_try_layout(jl_datatype_t *dt);
+int jl_type_mappable_to_c(jl_value_t *ty);
 jl_svec_t *jl_outer_unionall_vars(jl_value_t *u);
 jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, int *issubty);
 jl_value_t *jl_type_intersection_env(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
@@ -523,8 +566,10 @@ jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n);
 void jl_reinstantiate_inner_types(jl_datatype_t *t);
 jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type);
 void jl_cache_type_(jl_datatype_t *type);
-void jl_assign_bits(void *dest, jl_value_t *bits) JL_NOTSAFEPOINT;
-void set_nth_field(jl_datatype_t *st, void *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT;
+void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT;
+jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic);
+jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic);
+jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic);
 jl_expr_t *jl_exprn(jl_sym_t *head, size_t n);
 jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module);
 jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st);
@@ -532,6 +577,7 @@ void jl_foreach_reachable_mtable(void (*visit)(jl_methtable_t *mt, void *env), v
 void jl_init_main_module(void);
 int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT;
 jl_array_t *jl_get_loaded_modules(void);
+int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
 
 jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded);
 
@@ -548,13 +594,15 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
 
 jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs);
 jl_value_t *jl_gf_invoke(jl_value_t *types, jl_value_t *f, jl_value_t **args, size_t nargs);
-JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, int lim, int include_ambiguous,
+JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
                                              size_t world, size_t *min_valid, size_t *max_valid, int *ambig);
 
 JL_DLLEXPORT jl_datatype_t *jl_first_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_table_for(
     jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_methtable_t *jl_method_get_table(
+    jl_method_t *method) JL_NOTSAFEPOINT;
 jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT);
 
 int jl_pointer_egal(jl_value_t *t);
@@ -563,7 +611,7 @@ void jl_compute_field_offsets(jl_datatype_t *st);
 jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
                                              int isunboxed, int hasptr, int isunion, int elsz);
 void jl_module_run_initializer(jl_module_t *m);
-jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT;
+jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b);
 extern jl_array_t *jl_module_init_order JL_GLOBALLY_ROOTED;
 extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
@@ -651,7 +699,6 @@ extern char jl_using_oprofile_jitevents;
 extern char jl_using_perf_jitevents;
 #endif
 extern char jl_using_gdb_jitevents;
-extern size_t jl_arr_xtralloc_limit;
 
 // -- init.c -- //
 
@@ -666,7 +713,7 @@ void jl_init_intrinsic_functions(void);
 void jl_init_intrinsic_properties(void);
 void jl_init_tasks(void) JL_GC_DISABLED;
 void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo);
-void jl_init_root_task(void *stack_lo, void *stack_hi);
+void jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
 void jl_init_serializer(void);
 void jl_gc_init(void);
 void jl_init_uv(void);
@@ -676,8 +723,6 @@ void jl_init_int32_int64_cache(void);
 
 void jl_teardown_codegen(void);
 
-void _julia_init(JL_IMAGE_SEARCH rel);
-
 void jl_set_base_ctx(char *__stk);
 
 extern ssize_t jl_tls_offset;
@@ -729,27 +774,36 @@ void jl_safepoint_defer_sigint(void);
 int jl_safepoint_consume_sigint(void);
 void jl_wake_libuv(void);
 
+void jl_set_pgcstack(jl_gcframe_t **) JL_NOTSAFEPOINT;
+#if defined(_OS_DARWIN_)
+typedef pthread_key_t jl_pgcstack_key_t;
+#elif defined(_OS_WINDOWS_)
+typedef DWORD jl_pgcstack_key_t;
+#else
+typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT;
+#endif
+void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k);
+
 #if !defined(__clang_analyzer__)
-jl_get_ptls_states_func jl_get_ptls_states_getter(void);
 static inline void jl_set_gc_and_wait(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     // reading own gc state doesn't need atomic ops since no one else
     // should store to it.
-    int8_t state = ptls->gc_state;
-    jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
+    int8_t state = ct->ptls->gc_state;
+    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
     jl_safepoint_wait_gc();
-    jl_atomic_store_release(&ptls->gc_state, state);
+    jl_atomic_store_release(&ct->ptls->gc_state, state);
 }
 #endif
 void jl_gc_set_permalloc_region(void *start, void *end);
 
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
-        int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo);
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLEXPORT void *jl_get_llvmf_defn(jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
-JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo);
-JL_DLLEXPORT jl_value_t *jl_dump_llvm_asm(void *F, const char* asm_variant, const char *debuginfo);
+JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLEXPORT jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo);
+JL_DLLEXPORT jl_value_t *jl_dump_function_asm(void *F, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
 
 void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int policy);
 void jl_dump_native(void *native_code,
@@ -1043,10 +1097,9 @@ extern void *jl_crtdll_handle;
 extern void *jl_winsock_handle;
 #endif
 
-void *jl_get_library_(const char *f_lib, int throw_err) JL_NOTSAFEPOINT;
+void *jl_get_library_(const char *f_lib, int throw_err);
 #define jl_get_library(f_lib) jl_get_library_(f_lib, 1)
-JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name,
-                                      void **hnd) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, void **hnd);
 JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name);
 JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
     jl_value_t *fobj, jl_datatype_t *result, htable_t *cache, jl_svec_t *fill,
@@ -1076,9 +1129,19 @@ extern JL_DLLEXPORT jl_value_t *jl_segv_exception;
 JL_DLLEXPORT const char *jl_intrinsic_name(int f) JL_NOTSAFEPOINT;
 unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;
 
+STATIC_INLINE int is_valid_intrinsic_elptr(jl_value_t *ety)
+{
+    return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout));
+}
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t *align);
 JL_DLLEXPORT jl_value_t *jl_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t *align, jl_value_t *i);
+JL_DLLEXPORT jl_value_t *jl_atomic_fence(jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerref(jl_value_t *p, jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerswap(jl_value_t *p, jl_value_t *x, jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, jl_value_t *x, jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *x, jl_value_t *expected, jl_value_t *success_order, jl_value_t *failure_order);
 JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty);
 JL_DLLEXPORT jl_value_t *jl_cglobal_auto(jl_value_t *v);
 
@@ -1115,7 +1178,6 @@ JL_DLLEXPORT jl_value_t *jl_ne_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_lt_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_le_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_fpiseq(jl_value_t *a, jl_value_t *b);
-JL_DLLEXPORT jl_value_t *jl_fpislt(jl_value_t *a, jl_value_t *b);
 
 JL_DLLEXPORT jl_value_t *jl_not_int(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_and_int(jl_value_t *a, jl_value_t *b);
@@ -1189,16 +1251,8 @@ void jl_smallintset_insert(jl_array_t **pcache, jl_value_t *parent, smallintset_
 
 // -- typemap.c -- //
 
-// a descriptor of a jl_typemap_t that gets
-// passed around as self-documentation of the parameters of the type
-struct jl_typemap_info {
-    int8_t unsorted; // whether this should be unsorted
-    jl_datatype_t **jl_contains; // the type that is being put in this
-};
-
 void jl_typemap_insert(jl_typemap_t **cache, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, int8_t offs,
-        const struct jl_typemap_info *tparams);
+        jl_typemap_entry_t *newrec, int8_t offs);
 jl_typemap_entry_t *jl_typemap_alloc(
         jl_tupletype_t *type, jl_tupletype_t *simpletype, jl_svec_t *guardsigs,
         jl_value_t *newvalue, size_t min_world, size_t max_world);
@@ -1290,6 +1344,7 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
 int isabspath(const char *in) JL_NOTSAFEPOINT;
 
 extern jl_sym_t *call_sym;    extern jl_sym_t *invoke_sym;
+extern jl_sym_t *invoke_modify_sym;
 extern jl_sym_t *empty_sym;   extern jl_sym_t *top_sym;
 extern jl_sym_t *module_sym;  extern jl_sym_t *slot_sym;
 extern jl_sym_t *export_sym;  extern jl_sym_t *import_sym;
@@ -1331,6 +1386,17 @@ extern jl_sym_t *optlevel_sym; extern jl_sym_t *compile_sym;
 extern jl_sym_t *infer_sym;
 extern jl_sym_t *atom_sym; extern jl_sym_t *statement_sym; extern jl_sym_t *all_sym;
 
+extern jl_sym_t *atomic_sym;
+extern jl_sym_t *not_atomic_sym;
+extern jl_sym_t *unordered_sym;
+extern jl_sym_t *monotonic_sym; // or relaxed_sym?
+extern jl_sym_t *acquire_sym;
+extern jl_sym_t *release_sym;
+extern jl_sym_t *acquire_release_sym;
+extern jl_sym_t *sequentially_consistent_sym; // or strong_sym?
+enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
+enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
+
 struct _jl_sysimg_fptrs_t;
 
 void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
@@ -1384,4 +1450,39 @@ uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
 }
 #endif
 
+#ifdef USE_DTRACE
+#include "uprobes.h.gen"
+
+// uprobes.h.gen on systems with DTrace, is auto-generated to include
+// `JL_PROBE_{PROBE}` and `JL_PROBE_{PROBE}_ENABLED()` macros for every probe
+// defined in uprobes.d
+//
+// If the arguments to `JL_PROBE_{PROBE}` are expensive to compute, the call to
+// these functions must be guarded by a JL_PROBE_{PROBE}_ENABLED() check, to
+// minimize performance impact when probing is off. As an example:
+//
+//    if (JL_PROBE_GC_STOP_THE_WORLD_ENABLED())
+//        JL_PROBE_GC_STOP_THE_WORLD();
+
+#else
+// define a dummy version of the probe functions
+#define JL_PROBE_GC_BEGIN(collection) do ; while (0)
+#define JL_PROBE_GC_STOP_THE_WORLD() do ; while (0)
+#define JL_PROBE_GC_MARK_BEGIN() do ; while (0)
+#define JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes) do ; while (0)
+#define JL_PROBE_GC_SWEEP_BEGIN(full) do ; while (0)
+#define JL_PROBE_GC_SWEEP_END() do ; while (0)
+#define JL_PROBE_GC_END() do ; while (0)
+#define JL_PROBE_GC_FINALIZER() do ; while (0)
+
+#define JL_PROBE_GC_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_STOP_THE_WORLD_ENABLED() (0)
+#define JL_PROBE_GC_MARK_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_MARK_END_ENABLED() (0)
+#define JL_PROBE_GC_SWEEP_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_SWEEP_END_ENABLED()  (0)
+#define JL_PROBE_GC_END_ENABLED() (0)
+#define JL_PROBE_GC_FINALIZER_ENABLED() (0)
+#endif
+
 #endif
diff --git a/src/julia_threads.h b/src/julia_threads.h
index b832d5c55b440f..23fa8d1ea78648 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -7,6 +7,14 @@
 #include <atomics.h>
 // threading ------------------------------------------------------------------
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+JL_DLLEXPORT int16_t jl_threadid(void);
+JL_DLLEXPORT void jl_threading_profile(void);
+
 // JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user
 
 // When running into scheduler issues, this may help provide information on the
@@ -57,7 +65,7 @@ struct jl_stack_context_t {
 typedef struct jl_stack_context_t jl_ucontext_t;
 #endif
 #if defined(JL_HAVE_ASYNCIFY)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN not currently supported with asyncify
 #endif
 typedef struct {
@@ -133,11 +141,11 @@ typedef struct {
 
     // variables for allocating objects from pools
 #ifdef _P64
-#  define JL_GC_N_POOLS 41
+#  define JL_GC_N_POOLS 49
 #elif MAX_ALIGN == 8
-#  define JL_GC_N_POOLS 42
+#  define JL_GC_N_POOLS 50
 #else
-#  define JL_GC_N_POOLS 43
+#  define JL_GC_N_POOLS 51
 #endif
     jl_gc_pool_t norm_pools[JL_GC_N_POOLS];
 
@@ -182,9 +190,7 @@ struct _jl_bt_element_t;
 // This includes all the thread local states we care about for a thread.
 // Changes to TLS field types must be reflected in codegen.
 #define JL_MAX_BT_SIZE 80000
-struct _jl_tls_states_t {
-    struct _jl_gcframe_t *pgcstack;
-    size_t world_age;
+typedef struct _jl_tls_states_t {
     int16_t tid;
     uint64_t rngseed;
     volatile size_t *safepoint;
@@ -197,18 +203,23 @@ struct _jl_tls_states_t {
     // gc_state = 2 means the thread is running unmanaged code that can be
     //              execute at the same time with the GC.
     int8_t gc_state; // read from foreign threads
+    // execution of certain certain impure
+    // statements is prohibited from certain
+    // callbacks (such as generated functions)
+    // as it may make compilation undecidable
+    int8_t in_pure_callback;
     int8_t in_finalizer;
     int8_t disable_gc;
-    jl_thread_heap_t heap;
+    // Counter to disable finalizer **on the current thread**
+    int finalizers_inhibited;
+    jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
     jl_thread_gc_num_t gc_num;
     uv_mutex_t sleep_lock;
     uv_cond_t wake_signal;
     volatile sig_atomic_t defer_signal;
     struct _jl_task_t *current_task;
     struct _jl_task_t *next_task;
-#ifdef MIGRATE_TASKS
     struct _jl_task_t *previous_task;
-#endif
     struct _jl_task_t *root_task;
     struct _jl_timing_block_t *timing_stack;
     void *stackbase;
@@ -222,7 +233,6 @@ struct _jl_tls_states_t {
         struct jl_stack_context_t copy_stack_ctx;
 #endif
     };
-    jl_jmp_buf *safe_restore;
     // Temp storage for exception thrown in signal handler. Not rooted.
     struct _jl_value_t *sig_exception;
     // Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
@@ -240,18 +250,11 @@ struct _jl_tls_states_t {
     void *signal_stack;
 #endif
     jl_thread_t system_id;
-    // execution of certain certain impure
-    // statements is prohibited from certain
-    // callbacks (such as generated functions)
-    // as it may make compilation undecidable
-    int in_pure_callback;
-    // Counter to disable finalizer **on the current thread**
-    int finalizers_inhibited;
     arraylist_t finalizers;
     jl_gc_mark_cache_t gc_cache;
     arraylist_t sweep_objs;
     jl_gc_mark_sp_t gc_mark_sp;
-    // Saved exception for previous external API call or NULL if cleared.
+    // Saved exception for previous *external* API call or NULL if cleared.
     // Access via jl_exception_occurred().
     struct _jl_value_t *previous_exception;
 
@@ -264,7 +267,9 @@ struct _jl_tls_states_t {
         uint64_t sleep_enter;
         uint64_t sleep_leave;
     )
-};
+} jl_tls_states_t;
+
+typedef jl_tls_states_t *jl_ptls_t;
 
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
@@ -285,10 +290,6 @@ struct _jl_tls_states_t {
 #  define JL_CPU_WAKE_NOOP 1
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 JL_DLLEXPORT void (jl_cpu_pause)(void);
 JL_DLLEXPORT void (jl_cpu_wake)(void);
 
@@ -342,10 +343,10 @@ int8_t jl_gc_safe_leave(jl_ptls_t ptls, int8_t state); // Can be a safepoint
 #endif
 JL_DLLEXPORT void (jl_gc_safepoint)(void);
 
-JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on);
+JL_DLLEXPORT void jl_gc_enable_finalizers(struct _jl_task_t *ct, int on);
 JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void);
 JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void);
-JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_ptls_t ptls);
+JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct);
 extern JL_DLLEXPORT int jl_gc_have_pending_finalizers;
 
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index 3bb11988b730e9..f7130f69044797 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -24,9 +24,7 @@
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/PromoteMemToReg.h>
 
-#if JL_LLVM_VERSION >= 100000
 #include <llvm/InitializePasses.h>
-#endif
 
 #include "codegen_shared.h"
 #include "julia.h"
@@ -64,13 +62,8 @@ static bool hasObjref(Type *ty)
 {
     if (auto ptrty = dyn_cast<PointerType>(ty))
         return ptrty->getAddressSpace() == AddressSpace::Tracked;
-#if JL_LLVM_VERSION >= 110000
     if (isa<ArrayType>(ty) || isa<VectorType>(ty))
         return hasObjref(GetElementPtrInst::getTypeAtIndex(ty, (uint64_t)0));
-#else
-    if (auto seqty = dyn_cast<SequentialType>(ty))
-        return hasObjref(seqty->getElementType());
-#endif
     if (auto structty = dyn_cast<StructType>(ty)) {
         for (auto elty: structty->elements()) {
             if (hasObjref(elty)) {
@@ -635,6 +628,21 @@ void Optimizer::checkInst(Instruction *I)
                 use_info.hasunknownmem = true;
             return true;
         }
+        if (isa<AtomicCmpXchgInst>(inst) || isa<AtomicRMWInst>(inst)) {
+            // Only store value count
+            if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
+                use_info.escaped = true;
+                return false;
+            }
+            use_info.hasload = true;
+            auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
+            if (cur.offset == UINT32_MAX || !use_info.addMemOp(inst, use->getOperandNo(),
+                                                               cur.offset, storev->getType(),
+                                                               true, *pass.DL))
+                use_info.hasunknownmem = true;
+            use_info.refload = true;
+            return true;
+        }
         if (isa<AddrSpaceCastInst>(inst) || isa<BitCastInst>(inst)) {
             push_inst(inst);
             return true;
@@ -1156,6 +1164,7 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
 {
     auto tag = orig_inst->getArgOperand(2);
     // `julia.typeof` is only legal on the original pointer, no need to scan recursively
+    size_t last_deleted = removed.size();
     for (auto user: orig_inst->users()) {
         if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
@@ -1168,6 +1177,8 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
             }
         }
     }
+    while (last_deleted < removed.size())
+        removed[last_deleted++]->replaceUsesOfWith(orig_inst, UndefValue::get(orig_inst->getType()));
 }
 
 void Optimizer::splitOnStack(CallInst *orig_inst)
@@ -1289,11 +1300,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 val = newload;
             }
             // TODO: should we use `load->clone()`, or manually copy any other metadata?
-#if JL_LLVM_VERSION >= 100000
             newload->setAlignment(load->getAlign());
-#else
-            newload->setAlignment(load->getAlignment());
-#endif
             // since we're moving heap-to-stack, it is safe to downgrade the atomic level to NotAtomic
             newload->setOrdering(AtomicOrdering::NotAtomic);
             load->replaceAllUsesWith(val);
@@ -1333,16 +1340,28 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 newstore = builder.CreateStore(store_val, slot_gep(slot, offset, store_ty, builder));
             }
             // TODO: should we use `store->clone()`, or manually copy any other metadata?
-#if JL_LLVM_VERSION >= 100000
             newstore->setAlignment(store->getAlign());
-#else
-            newstore->setAlignment(store->getAlignment());
-#endif
             // since we're moving heap-to-stack, it is safe to downgrade the atomic level to NotAtomic
             newstore->setOrdering(AtomicOrdering::NotAtomic);
             store->eraseFromParent();
             return;
         }
+        else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
+            auto slot_idx = find_slot(offset);
+            auto &slot = slots[slot_idx];
+            assert(slot.offset <= offset && slot.offset + slot.size >= offset);
+            IRBuilder<> builder(user);
+            Value *newptr;
+            if (slot.isref) {
+                assert(slot.offset == offset);
+                newptr = slot.slot;
+            }
+            else {
+                Value *Val = isa<AtomicCmpXchgInst>(user) ? cast<AtomicCmpXchgInst>(user)->getNewValOperand() : cast<AtomicRMWInst>(user)->getValOperand();
+                newptr = slot_gep(slot, offset, Val->getType(), builder);
+            }
+            *use = newptr;
+        }
         else if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
             assert(callee); // makes it clear for clang analyser that `callee` is not NULL
@@ -1383,11 +1402,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                             auto sub_size = std::min(slot.offset + slot.size, offset + size) -
                                 std::max(offset, slot.offset);
                             // TODO: alignment computation
-#if JL_LLVM_VERSION >= 100000
                             builder.CreateMemSet(ptr8, val_arg, sub_size, MaybeAlign(0));
-#else
-                            builder.CreateMemSet(ptr8, val_arg, sub_size, 0);
-#endif
                         }
                         call->eraseFromParent();
                         return;
diff --git a/src/llvm-api.cpp b/src/llvm-api.cpp
deleted file mode 100644
index db8f6f94dc7813..00000000000000
--- a/src/llvm-api.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-// Extensions of the LLVM C API for LLVM.jl
-//
-// These are part of the Julia repository as they need to be
-// built with the same C++ toolchain Julia & LLVM are built with
-//
-// They are not to be considered a stable API, and will be removed
-// when better package build systems are available
-
-#include "llvm-version.h"
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/Triple.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/IR/Attributes.h>
-#if JL_LLVM_VERSION < 110000
-#include <llvm/IR/CallSite.h>
-#endif
-#include <llvm/IR/DebugInfo.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/GlobalValue.h>
-#include <llvm/IR/Instruction.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/Module.h>
-#include <llvm/Support/TargetSelect.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#if JL_LLVM_VERSION < 120000
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#endif
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include "julia.h"
-
-using namespace llvm::legacy;
-
-namespace llvm {
-
-
-// Initialization functions
-//
-// The LLVMInitialize* functions and friends are defined `static inline`
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllTargetInfos()
-{
-    InitializeAllTargetInfos();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllTargets()
-{
-    InitializeAllTargets();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllTargetMCs()
-{
-    InitializeAllTargetMCs();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllAsmPrinters()
-{
-    InitializeAllAsmPrinters();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllAsmParsers()
-{
-    InitializeAllAsmParsers();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllDisassemblers()
-{
-    InitializeAllDisassemblers();
-}
-
-extern "C" JL_DLLEXPORT LLVMBool LLVMExtraInitializeNativeTarget()
-{
-    return InitializeNativeTarget();
-}
-
-extern "C" JL_DLLEXPORT LLVMBool LLVMExtraInitializeNativeAsmParser()
-{
-    return InitializeNativeTargetAsmParser();
-}
-
-extern "C" JL_DLLEXPORT LLVMBool LLVMExtraInitializeNativeAsmPrinter()
-{
-    return InitializeNativeTargetAsmPrinter();
-}
-
-extern "C" JL_DLLEXPORT LLVMBool LLVMExtraInitializeNativeDisassembler()
-{
-    return InitializeNativeTargetDisassembler();
-}
-
-// Various missing passes (being upstreamed)
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddBarrierNoopPass(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createBarrierNoopPass());
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddDivRemPairsPass(LLVMPassManagerRef PM) {
-    unwrap(PM)->add(createDivRemPairsPass());
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddLoopDistributePass(LLVMPassManagerRef PM) {
-    unwrap(PM)->add(createLoopDistributePass());
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddLoopFusePass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createLoopFusePass());
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraLoopLoadEliminationPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createLoopLoadEliminationPass());
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddLoadStoreVectorizerPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createLoadStoreVectorizerPass());
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddVectorCombinePass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createVectorCombinePass());
-}
-
-// Can be removed in LLVM 12
-extern "C" JL_DLLEXPORT void LLVMExtraAddInstructionSimplifyPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createInstSimplifyLegacyPass());
-}
-
-
-// Infrastructure for writing LLVM passes in Julia
-
-typedef struct LLVMOpaquePass *LLVMPassRef;
-DEFINE_STDCXX_CONVERSION_FUNCTIONS(Pass, LLVMPassRef)
-
-extern "C" JL_DLLEXPORT void
-LLVMExtraAddPass(LLVMPassManagerRef PM, LLVMPassRef P)
-{
-    unwrap(PM)->add(unwrap(P));
-}
-
-typedef LLVMBool (*LLVMPassCallback)(void* Ref, void* Data);
-
-StringMap<char *> PassIDs;
-char &CreatePassID(const char *Name)
-{
-    std::string NameStr(Name);
-    if (PassIDs.find(NameStr) != PassIDs.end())
-        return *PassIDs[NameStr];
-    else
-        return *(PassIDs[NameStr] = new char);
-}
-
-class JuliaModulePass : public ModulePass {
-public:
-    JuliaModulePass(const char *Name, LLVMPassCallback Callback, void* Data)
-        : ModulePass(CreatePassID(Name)), Callback(Callback), Data(Data)
-    {
-    }
-
-    bool runOnModule(Module &M)
-    {
-        void *Ref = (void*)wrap(&M);
-        bool Changed = Callback(Ref, Data);
-        return Changed;
-    }
-
-private:
-    LLVMPassCallback Callback;
-    void* Data;
-};
-
-extern "C" JL_DLLEXPORT LLVMPassRef
-LLVMExtraCreateModulePass2(const char *Name, LLVMPassCallback Callback, void *Data)
-{
-    return wrap(new JuliaModulePass(Name, Callback, Data));
-}
-
-class JuliaFunctionPass : public FunctionPass {
-public:
-    JuliaFunctionPass(const char *Name, LLVMPassCallback Callback, void* Data)
-        : FunctionPass(CreatePassID(Name)), Callback(Callback), Data(Data)
-    {
-    }
-
-    bool runOnFunction(Function &Fn)
-    {
-        void *Ref = (void*)wrap(&Fn);
-        bool Changed = Callback(Ref, Data);
-        return Changed;
-    }
-
-private:
-    LLVMPassCallback Callback;
-    void* Data;
-};
-
-extern "C" JL_DLLEXPORT LLVMPassRef
-LLVMExtraCreateFunctionPass2(const char *Name, LLVMPassCallback Callback, void *Data)
-{
-    return wrap(new JuliaFunctionPass(Name, Callback, Data));
-}
-
-
-// Various missing functions
-
-extern "C" JL_DLLEXPORT unsigned int LLVMExtraGetDebugMDVersion()
-{
-    return DEBUG_METADATA_VERSION;
-}
-
-extern "C" JL_DLLEXPORT LLVMContextRef LLVMExtraGetValueContext(LLVMValueRef V)
-{
-    return wrap(&unwrap(V)->getContext());
-}
-
-extern "C" JL_DLLEXPORT void
-LLVMExtraAddTargetLibraryInfoByTiple(const char *T, LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(new TargetLibraryInfoWrapperPass(Triple(T)));
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddInternalizePassWithExportList(
-        LLVMPassManagerRef PM, const char **ExportList, size_t Length)
-{
-    auto PreserveFobj = [=](const GlobalValue &GV) {
-        for (size_t i = 0; i < Length; i++) {
-            if (strcmp(ExportList[i], GV.getName().data()) == 0)
-                return true;
-        }
-        return false;
-    };
-    unwrap(PM)->add(createInternalizePass(PreserveFobj));
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAppendToUsed(LLVMModuleRef Mod,
-                                                   LLVMValueRef* Values,
-                                                   size_t Count) {
-    SmallVector<GlobalValue *, 1> GlobalValues;
-    for (auto *Value : makeArrayRef(Values, Count))
-        GlobalValues.push_back(cast<GlobalValue>(unwrap(Value)));
-    appendToUsed(*unwrap(Mod), GlobalValues);
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAppendToCompilerUsed(LLVMModuleRef Mod,
-                                                           LLVMValueRef* Values,
-                                                           size_t Count) {
-    SmallVector<GlobalValue *, 1> GlobalValues;
-    for (auto *Value : makeArrayRef(Values, Count))
-        GlobalValues.push_back(cast<GlobalValue>(unwrap(Value)));
-    appendToCompilerUsed(*unwrap(Mod), GlobalValues);
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddGenericAnalysisPasses(LLVMPassManagerRef PM) {
-    unwrap(PM)->add(createTargetTransformInfoWrapperPass(TargetIRAnalysis()));
-}
-
-
-// Awaiting D46627
-
-extern "C" JL_DLLEXPORT int LLVMExtraGetSourceLocation(LLVMValueRef V, int index,
-                                                        const char** Name,
-                                                        const char** Filename,
-                                                        unsigned int* Line,
-                                                        unsigned int* Column)
-{
-    if (auto I = dyn_cast<Instruction>(unwrap(V))) {
-        const DILocation* DIL = I->getDebugLoc();
-        if (!DIL)
-            return 0;
-
-        for (int i = index; i > 0; i--) {
-            DIL = DIL->getInlinedAt();
-            if (!DIL)
-                return 0;
-        }
-
-        *Name = DIL->getScope()->getName().data();
-        *Filename = DIL->getScope()->getFilename().data();
-        *Line = DIL->getLine();
-        *Column = DIL->getColumn();
-
-        return 1;
-
-    } else {
-        jl_exceptionf(jl_argumenterror_type, "Can only get source location information of instructions");
-    }
-}
-
-} // namespace llvm
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index e11df11dcc9762..bc68edda2cad78 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -37,7 +37,7 @@ struct FinalLowerGC: public FunctionPass, private JuliaPassContext {
     Function *queueRootFunc;
     Function *poolAllocFunc;
     Function *bigAllocFunc;
-    CallInst *ptlsStates;
+    Instruction *pgcstack;
 
     bool doInitialization(Module &M) override;
     bool doFinalization(Module &M) override;
@@ -60,8 +60,6 @@ struct FinalLowerGC: public FunctionPass, private JuliaPassContext {
 
     // Lowers a `julia.queue_gc_root` intrinsic.
     Value *lowerQueueGCRoot(CallInst *target, Function &F);
-
-    Instruction *getPgcstack(Instruction *ptlsStates);
 };
 
 Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
@@ -111,7 +109,6 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
                         T_size->getPointerTo()),
                 Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    Value *pgcstack = builder.Insert(getPgcstack(ptlsStates));
     inst = builder.CreateAlignedStore(
             builder.CreateAlignedLoad(pgcstack, Align(sizeof(void*))),
             builder.CreatePointerCast(
@@ -138,8 +135,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     inst = builder.CreateAlignedStore(
         inst,
-        builder.CreateBitCast(
-            builder.Insert(getPgcstack(ptlsStates)),
+        builder.CreateBitCast(pgcstack,
             PointerType::get(T_prjlvalue, 0)),
         Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
@@ -171,16 +167,6 @@ Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
     return target;
 }
 
-Instruction *FinalLowerGC::getPgcstack(Instruction *ptlsStates)
-{
-    Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*));
-    return GetElementPtrInst::CreateInBounds(
-        T_ppjlvalue,
-        ptlsStates,
-        ArrayRef<Value*>(offset),
-        "jl_pgcstack");
-}
-
 Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
 {
     assert(target->getNumArgOperands() == 2);
@@ -282,13 +268,13 @@ bool FinalLowerGC::runOnFunction(Function &F)
     LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
     // Check availability of functions again since they might have been deleted.
     initFunctions(*F.getParent());
-    if (!ptls_getter)
-        return true;
+    if (!pgcstack_getter)
+        return false;
 
-    // Look for a call to 'julia.ptls_states'.
-    ptlsStates = getPtls(F);
-    if (!ptlsStates)
-        return true;
+    // Look for a call to 'julia.get_pgcstack'.
+    pgcstack = getPGCstack(F);
+    if (!pgcstack)
+        return false;
 
     // Acquire intrinsic functions.
     auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp
index c1386efef8e726..29b8c9ac4e60ce 100644
--- a/src/llvm-gc-invariant-verifier.cpp
+++ b/src/llvm-gc-invariant-verifier.cpp
@@ -19,9 +19,6 @@
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/InstVisitor.h>
-#if JL_LLVM_VERSION < 110000
-#include <llvm/IR/CallSite.h>
-#endif
 #include <llvm/IR/Module.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Verifier.h>
@@ -58,13 +55,17 @@ struct GCInvariantVerifier : public FunctionPass, public InstVisitor<GCInvariant
 
     bool runOnFunction(Function &F) override;
     void visitAddrSpaceCastInst(AddrSpaceCastInst &I);
-    void visitStoreInst(StoreInst &SI);
     void visitLoadInst(LoadInst &LI);
+    void visitStoreInst(StoreInst &SI);
+    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI);
+    void visitAtomicRMWInst(AtomicRMWInst &SI);
     void visitReturnInst(ReturnInst &RI);
     void visitGetElementPtrInst(GetElementPtrInst &GEP);
     void visitIntToPtrInst(IntToPtrInst &IPI);
     void visitPtrToIntInst(PtrToIntInst &PII);
     void visitCallInst(CallInst &CI);
+
+    void checkStoreInst(Type *VTy, unsigned AS, Value &SI);
 };
 
 void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
@@ -83,8 +84,7 @@ void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
           "Illegal address space cast from decayed ptr", &I);
 }
 
-void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
-    Type *VTy = SI.getValueOperand()->getType();
+void GCInvariantVerifier::checkStoreInst(Type *VTy, unsigned AS, Value &SI) {
     if (VTy->isPointerTy()) {
         /* We currently don't obey this for arguments. That's ok - they're
            externally rooted. */
@@ -93,12 +93,23 @@ void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
               AS != AddressSpace::Derived,
               "Illegal store of decayed value", &SI);
     }
-    VTy = SI.getPointerOperand()->getType();
-    if (VTy->isPointerTy()) {
-        unsigned AS = cast<PointerType>(VTy)->getAddressSpace();
-        Check(AS != AddressSpace::CalleeRooted,
-              "Illegal store to callee rooted value", &SI);
-    }
+    Check(AS != AddressSpace::CalleeRooted,
+          "Illegal store to callee rooted value", &SI);
+}
+
+void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
+    Type *VTy = SI.getValueOperand()->getType();
+    checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
+}
+
+void GCInvariantVerifier::visitAtomicRMWInst(AtomicRMWInst &SI) {
+    Type *VTy = SI.getValOperand()->getType();
+    checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
+}
+
+void GCInvariantVerifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI) {
+    Type *VTy = SI.getNewValOperand()->getType();
+    checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
 }
 
 void GCInvariantVerifier::visitLoadInst(LoadInst &LI) {
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index 99ff45cf618159..0aac7638be0d22 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -16,9 +16,6 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
-#if JL_LLVM_VERSION < 110000
-#include <llvm/IR/CallSite.h>
-#endif
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/MDBuilder.h>
 #include <llvm/IR/Module.h>
@@ -29,9 +26,7 @@
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
 
-#if JL_LLVM_VERSION >= 100000
 #include <llvm/InitializePasses.h>
-#endif
 
 #include "codegen_shared.h"
 #include "julia.h"
@@ -326,7 +321,7 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext {
     }
 
 private:
-    CallInst *ptlsStates;
+    CallInst *pgcstack;
 
     void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
     void NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses);
@@ -401,8 +396,14 @@ CountTrackedPointers::CountTrackedPointers(Type *T) {
         }
         if (isa<ArrayType>(T))
             count *= cast<ArrayType>(T)->getNumElements();
-        else if (isa<VectorType>(T))
+        else if (isa<VectorType>(T)) {
+#if JL_LLVM_VERSION >= 120000
+            ElementCount EC = cast<VectorType>(T)->getElementCount();
+            count *= EC.getKnownMinValue();
+#else
             count *= cast<VectorType>(T)->getNumElements();
+#endif
+        }
     }
     if (count == 0)
         all = false;
@@ -413,8 +414,14 @@ unsigned getCompositeNumElements(Type *T) {
         return ST->getNumElements();
     else if (auto *AT = dyn_cast<ArrayType>(T))
         return AT->getNumElements();
-    else
+    else {
+#if JL_LLVM_VERSION >= 120000
+        ElementCount EC = cast<VectorType>(T)->getElementCount();
+        return EC.getKnownMinValue();
+#else
         return cast<VectorType>(T)->getNumElements();
+#endif
+    }
 }
 
 // Walk through a Type, and record the element path to every tracked value inside
@@ -427,11 +434,7 @@ void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::v
         unsigned Idx, NumEl = getCompositeNumElements(T);
         for (Idx = 0; Idx < NumEl; Idx++) {
             Idxs.push_back(Idx);
-#if JL_LLVM_VERSION >= 110000
             Type *ElT = GetElementPtrInst::getTypeAtIndex(T, Idx);
-#else
-            Type *ElT = cast<CompositeType>(T)->getTypeAtIndex(Idx);
-#endif
             TrackCompositeType(ElT, Idxs, Numberings);
             Idxs.pop_back();
         }
@@ -505,6 +508,16 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             // In general a load terminates a walk
             break;
         }
+        else if (auto LI = dyn_cast<AtomicCmpXchgInst>(CurrentV)) {
+            // In general a load terminates a walk
+            (void)LI;
+            break;
+        }
+        else if (auto LI = dyn_cast<AtomicRMWInst>(CurrentV)) {
+            // In general a load terminates a walk
+            (void)LI;
+            break;
+        }
         else if (auto II = dyn_cast<IntrinsicInst>(CurrentV)) {
             // Some intrinsics behave like LoadInst followed by a SelectInst
             // This should never happen in a derived addrspace (since those cannot be stored to memory)
@@ -547,6 +560,7 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
         }
     }
     assert(isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) ||
+           isa<AtomicCmpXchgInst>(CurrentV) || isa<AtomicRMWInst>(CurrentV) ||
            isa<Argument>(CurrentV) || isa<SelectInst>(CurrentV) ||
            isa<PHINode>(CurrentV) || isa<AddrSpaceCastInst>(CurrentV) ||
            isa<Constant>(CurrentV) || isa<AllocaInst>(CurrentV) ||
@@ -576,11 +590,7 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
         bool IsVector = isa<VectorType>(FinalT);
         PointerType *T = cast<PointerType>(
-#if JL_LLVM_VERSION >= 110000
             GetElementPtrInst::getTypeAtIndex(FinalT, Idxs.back()));
-#else
-            cast<CompositeType>(FinalT)->getTypeAtIndex(Idxs.back()));
-#endif
         if (T->getAddressSpace() != AddressSpace::Tracked) {
             // if V isn't tracked, get the shadow def
             auto Numbers = NumberAllBase(S, V);
@@ -638,8 +648,14 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
     }
     std::vector<int> Numbers;
     unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<VectorType>(SI->getType()))
+    if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
+#if JL_LLVM_VERSION >= 120000
+        ElementCount EC = VTy->getElementCount();
+        Numbers.resize(EC.getKnownMinValue(), -1);
+#else
         Numbers.resize(VTy->getNumElements(), -1);
+#endif
+    }
     else
         assert(isa<PointerType>(SI->getType()) && "unimplemented");
     assert(!isTrackedValue(SI));
@@ -699,7 +715,12 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
             assert(NumRoots == 1);
             int Number = Numbers[0];
             Numbers.resize(0);
+#if JL_LLVM_VERSION >= 120000
+            ElementCount EC = VTy->getElementCount();
+            Numbers.resize(EC.getKnownMinValue(), Number);
+#else
             Numbers.resize(VTy->getNumElements(), Number);
+#endif
         }
     }
     if (!isa<PointerType>(SI->getType()))
@@ -896,7 +917,8 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
             Numbers = S.AllCompositeNumbering.at(CurrentV);
         }
     } else {
-        assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV))
+        assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV) ||
+                isa<AtomicCmpXchgInst>(CurrentV) || isa<AtomicRMWInst>(CurrentV))
                 && "unexpected def expression");
         // This is simple, we can just number them sequentially
         for (unsigned i = 0; i < tracked.count; ++i) {
@@ -1055,7 +1077,8 @@ void RecursivelyVisit(callback f, Value *V) {
             f(VU);
         if (isa<CallInst>(TheUser) || isa<LoadInst>(TheUser) ||
             isa<SelectInst>(TheUser) || isa<PHINode>(TheUser) ||
-            isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser))
+            isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser) ||
+            isa<AtomicCmpXchgInst>(TheUser) || isa<AtomicRMWInst>(TheUser))
             continue;
         if (isa<GetElementPtrInst>(TheUser) || isa<BitCastInst>(TheUser) || isa<AddrSpaceCastInst>(TheUser)) {
             RecursivelyVisit<VisitInst, callback>(f, TheUser);
@@ -1161,7 +1184,7 @@ static bool isLoadFromConstGV(Value *v, bool &task_local)
         if (callee && callee->getName() == "julia.typeof") {
             return true;
         }
-        if (callee && callee->getName() == "julia.ptls_states") {
+        if (callee && callee->getName() == "julia.get_pgcstack") {
             task_local = true;
             return true;
         }
@@ -1527,7 +1550,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     // Known functions emitted in codegen that are not safepoints
                     if (callee == pointer_from_objref_func || callee == gc_preserve_begin_func ||
                         callee == gc_preserve_end_func || callee == typeof_func ||
-                        callee == ptls_getter ||
+                        callee == pgcstack_getter || callee->getName() == "jl_egal__unboxed" ||
+                        callee->getName() == "jl_lock_value" || callee->getName() == "jl_unlock_value" ||
                         callee == write_barrier_func || callee->getName() == "memcmp") {
                         continue;
                     }
@@ -1549,7 +1573,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 for (Use &U : CI->arg_operands()) {
                     // Find all callee rooted arguments.
                     // Record them instead of simply remove them from live values here
-                    // since they can be useful during refinment
+                    // since they can be useful during refinement
                     // (e.g. to remove roots of objects that are refined to these)
                     Value *V = U;
                     if (isa<Constant>(V) || !isa<PointerType>(V->getType()) ||
@@ -1595,6 +1619,20 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     MaybeNoteDef(S, BBS, LI, BBS.Safepoints, std::move(RefinedPtr));
                 }
                 NoteOperandUses(S, BBS, I);
+            } else if (auto *LI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+                Type *Ty = LI->getNewValOperand()->getType()->getScalarType();
+                if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+                    MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
+                }
+                NoteOperandUses(S, BBS, I);
+                // TODO: do we need MaybeTrackStore(S, LI);
+            } else if (auto *LI = dyn_cast<AtomicRMWInst>(&I)) {
+                Type *Ty = LI->getType()->getScalarType();
+                if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+                    MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
+                }
+                NoteOperandUses(S, BBS, I);
+                // TODO: do we need MaybeTrackStore(S, LI);
             } else if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
                 auto tracked = CountTrackedPointers(SI->getType());
                 if (tracked.count && !tracked.derived) {
@@ -1670,11 +1708,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
     return S;
 }
 
-#if JL_LLVM_VERSION >= 110000
 static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
-#else
-static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> irbuilder) {
-#endif
     Type *T_int32 = Type::getInt32Ty(V->getContext());
     if (isptr) {
         std::vector<Value*> IdxList{Idxs.size() + 1};
@@ -1717,11 +1751,7 @@ static unsigned getFieldOffset(const DataLayout &DL, Type *STy, ArrayRef<unsigne
     return (unsigned)offset;
 }
 
-#if JL_LLVM_VERSION >= 110000
 std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
-#else
-std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> irbuilder, ArrayRef<unsigned> perm_offsets) {
-#endif
     auto Tracked = TrackCompositeType(STy);
     std::vector<Value*> Ptrs;
     unsigned perm_idx = 0;
@@ -1754,11 +1784,7 @@ std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBu
     return Ptrs;
 }
 
-#if JL_LLVM_VERSION >= 110000
 unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) {
-#else
-unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> irbuilder) {
-#endif
     auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
     for (unsigned i = 0; i < Ptrs.size(); ++i) {
         Value *Elem = Ptrs[i];
@@ -2275,11 +2301,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 // to remove write barrier because of it.
                 // We pretty much only load using `T_size` so try our best to strip
                 // as many cast as possible.
-#if JL_LLVM_VERSION >= 100000
                 auto tag = CI->getArgOperand(2)->stripPointerCastsAndAliases();
-#else
-                auto tag = CI->getArgOperand(2)->stripPointerCasts();
-#endif
                 if (auto C = dyn_cast<ConstantExpr>(tag)) {
                     if (C->getOpcode() == Instruction::IntToPtr) {
                         tag = C->getOperand(0);
@@ -2305,13 +2327,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 auto tag_type = tag->getType();
                 if (tag_type->isPointerTy()) {
                     auto &DL = CI->getModule()->getDataLayout();
-#if JL_LLVM_VERSION >= 110000
                     auto align = tag->getPointerAlignment(DL).value();
-#elif JL_LLVM_VERSION >= 100000
-                    auto align = tag->getPointerAlignment(DL).valueOrOne().value();
-#else
-                    auto align = tag->getPointerAlignment(DL);
-#endif
                     if (align < 16) {
                         // On 5 <= LLVM < 12, it is illegal to call this on
                         // non-integral pointer. This relies on stripping the
@@ -2554,7 +2570,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
         auto pushGcframe = CallInst::Create(
             getOrDeclare(jl_intrinsics::pushGCFrame),
             {gcframe, ConstantInt::get(T_int32, 0)});
-        pushGcframe->insertAfter(ptlsStates);
+        pushGcframe->insertAfter(pgcstack);
 
         // Replace Allocas
         unsigned AllocaSlot = 2; // first two words are metadata
@@ -2651,11 +2667,11 @@ bool LateLowerGCFrame::runOnFunction(Function &F) {
     LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
     // Check availability of functions again since they might have been deleted.
     initFunctions(*F.getParent());
-    if (!ptls_getter)
+    if (!pgcstack_getter)
         return CleanupIR(F);
 
-    ptlsStates = getPtls(F);
-    if (!ptlsStates)
+    pgcstack = getPGCstack(F);
+    if (!pgcstack)
         return CleanupIR(F);
 
     State S = LocalScan(F);
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
index 7382f3b74c0801..ae5a6f3b0c11d0 100644
--- a/src/llvm-lower-handlers.cpp
+++ b/src/llvm-lower-handlers.cpp
@@ -176,7 +176,7 @@ bool LowerExcHandlers::runOnFunction(Function &F) {
 
     /* Step 2: EH Frame lowering */
     // Allocate stack space for each handler. We allocate these as separate
-    // allocas so the optimizer can later merge and reaarange them if it wants
+    // allocas so the optimizer can later merge and rearrange them if it wants
     // to.
     Value *handler_sz = ConstantInt::get(Type::getInt32Ty(F.getContext()),
                                          sizeof(jl_handler_t));
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index d594408a20992a..0eed7aec98f0bc 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -24,7 +24,7 @@ JuliaPassContext::JuliaPassContext()
     : T_size(nullptr), T_int8(nullptr), T_int32(nullptr),
         T_pint8(nullptr), T_jlvalue(nullptr), T_prjlvalue(nullptr),
         T_ppjlvalue(nullptr), T_pjlvalue(nullptr), T_pjlvalue_der(nullptr),
-        T_ppjlvalue_der(nullptr), ptls_getter(nullptr), gc_flush_func(nullptr),
+        T_ppjlvalue_der(nullptr), pgcstack_getter(nullptr), gc_flush_func(nullptr),
         gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr),
         pointer_from_objref_func(nullptr), alloc_obj_func(nullptr),
         typeof_func(nullptr), write_barrier_func(nullptr), module(nullptr)
@@ -40,7 +40,7 @@ void JuliaPassContext::initFunctions(Module &M)
 {
     module = &M;
 
-    ptls_getter = M.getFunction("julia.ptls_states");
+    pgcstack_getter = M.getFunction("julia.get_pgcstack");
     gc_flush_func = M.getFunction("julia.gcroot_flush");
     gc_preserve_begin_func = M.getFunction("llvm.julia.gc_preserve_begin");
     gc_preserve_end_func = M.getFunction("llvm.julia.gc_preserve_end");
@@ -69,14 +69,15 @@ void JuliaPassContext::initAll(Module &M)
     T_ppjlvalue = PointerType::get(T_pjlvalue, 0);
     T_pjlvalue_der = PointerType::get(T_jlvalue, AddressSpace::Derived);
     T_ppjlvalue_der = PointerType::get(T_prjlvalue, AddressSpace::Derived);
+    T_pppjlvalue = PointerType::get(T_ppjlvalue, 0);
 }
 
-llvm::CallInst *JuliaPassContext::getPtls(llvm::Function &F) const
+llvm::CallInst *JuliaPassContext::getPGCstack(llvm::Function &F) const
 {
     for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end();
-         ptls_getter && I != E; ++I) {
+         pgcstack_getter && I != E; ++I) {
         if (CallInst *callInst = dyn_cast<CallInst>(&*I)) {
-            if (callInst->getCalledOperand() == ptls_getter) {
+            if (callInst->getCalledOperand() == pgcstack_getter) {
                 return callInst;
             }
         }
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index 71cab27e76ceba..f80786d1e71499 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -49,6 +49,7 @@ struct JuliaPassContext {
     // Types derived from 'jl_value_t'.
     llvm::Type *T_jlvalue;
     llvm::PointerType *T_prjlvalue;
+    llvm::PointerType *T_pppjlvalue;
     llvm::PointerType *T_ppjlvalue;
     llvm::PointerType *T_pjlvalue;
     llvm::PointerType *T_pjlvalue_der;
@@ -59,7 +60,7 @@ struct JuliaPassContext {
     llvm::MDNode *tbaa_tag;
 
     // Intrinsics.
-    llvm::Function *ptls_getter;
+    llvm::Function *pgcstack_getter;
     llvm::Function *gc_flush_func;
     llvm::Function *gc_preserve_begin_func;
     llvm::Function *gc_preserve_end_func;
@@ -86,10 +87,10 @@ struct JuliaPassContext {
         return module->getContext();
     }
 
-    // Gets a call to the `julia.ptls_states` intrinisc in the entry
+    // Gets a call to the `julia.get_pgcstack' intrinsic in the entry
     // point of the given function, if there exists such a call.
     // Otherwise, `nullptr` is returned.
-    llvm::CallInst *getPtls(llvm::Function &F) const;
+    llvm::CallInst *getPGCstack(llvm::Function &F) const;
 
     // Gets the intrinsic or well-known function that conforms to
     // the given description if it exists in the module. If not,
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index 95182f144d6ec9..a6afcda8709115 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -16,9 +16,6 @@
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/InstVisitor.h>
-#if JL_LLVM_VERSION < 110000
-#include <llvm/IR/CallSite.h>
-#endif
 #include <llvm/IR/Module.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Verifier.h>
@@ -54,8 +51,11 @@ struct PropagateJuliaAddrspaces : public FunctionPass, public InstVisitor<Propag
 public:
     bool runOnFunction(Function &F) override;
     Value *LiftPointer(Value *V, Type *LocTy = nullptr, Instruction *InsertPt=nullptr);
-    void visitStoreInst(StoreInst &SI);
+    void visitMemop(Instruction &I, Type *T, unsigned OpIndex);
     void visitLoadInst(LoadInst &LI);
+    void visitStoreInst(StoreInst &SI);
+    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI);
+    void visitAtomicRMWInst(AtomicRMWInst &SI);
     void visitMemSetInst(MemSetInst &MI);
     void visitMemTransferInst(MemTransferInst &MTI);
 
@@ -232,24 +232,31 @@ Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction
     return CollapseCastsAndLift(V, InsertPt);
 }
 
-void PropagateJuliaAddrspaces::visitLoadInst(LoadInst &LI) {
-    unsigned AS = LI.getPointerAddressSpace();
+void PropagateJuliaAddrspaces::visitMemop(Instruction &I, Type *T, unsigned OpIndex) {
+    Value *Original = I.getOperand(OpIndex);
+    unsigned AS = Original->getType()->getPointerAddressSpace();
     if (!isSpecialAS(AS))
         return;
-    Value *Replacement = LiftPointer(LI.getPointerOperand(), LI.getType(), &LI);
+    Value *Replacement = LiftPointer(Original, T, &I);
     if (!Replacement)
         return;
-    LI.setOperand(LoadInst::getPointerOperandIndex(), Replacement);
+    I.setOperand(OpIndex, Replacement);
+}
+
+void PropagateJuliaAddrspaces::visitLoadInst(LoadInst &LI) {
+    visitMemop(LI, LI.getType(), LoadInst::getPointerOperandIndex());
 }
 
 void PropagateJuliaAddrspaces::visitStoreInst(StoreInst &SI) {
-    unsigned AS = SI.getPointerAddressSpace();
-    if (!isSpecialAS(AS))
-        return;
-    Value *Replacement = LiftPointer(SI.getPointerOperand(), SI.getValueOperand()->getType(), &SI);
-    if (!Replacement)
-        return;
-    SI.setOperand(StoreInst::getPointerOperandIndex(), Replacement);
+    visitMemop(SI, SI.getValueOperand()->getType(), StoreInst::getPointerOperandIndex());
+}
+
+void PropagateJuliaAddrspaces::visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI) {
+    visitMemop(SI, SI.getNewValOperand()->getType(), AtomicCmpXchgInst::getPointerOperandIndex());
+}
+
+void PropagateJuliaAddrspaces::visitAtomicRMWInst(AtomicRMWInst &SI) {
+    visitMemop(SI, SI.getType(), AtomicRMWInst::getPointerOperandIndex());
 }
 
 void PropagateJuliaAddrspaces::visitMemSetInst(MemSetInst &MI) {
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index 6fbc40ceff0c4b..9cecceac9a1875 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -47,33 +47,36 @@ struct LowerPTLS: public ModulePass {
 private:
     const bool imaging_mode;
     Module *M;
-    Function *ptls_getter;
+    Function *pgcstack_getter;
     LLVMContext *ctx;
     MDNode *tbaa_const;
-    PointerType *T_ptls_getter;
+    FunctionType *FT_pgcstack_getter;
+    PointerType *T_pgcstack_getter;
     PointerType *T_ppjlvalue;
     PointerType *T_pppjlvalue;
     Type *T_int8;
     Type *T_size;
     PointerType *T_pint8;
-    GlobalVariable *ptls_slot{nullptr};
-    GlobalVariable *ptls_offset{nullptr};
-    void set_ptls_attrs(CallInst *ptlsStates) const;
-    Instruction *emit_ptls_tp(Value *offset, Instruction *insertBefore) const;
+    GlobalVariable *pgcstack_func_slot{nullptr};
+    GlobalVariable *pgcstack_key_slot{nullptr};
+    GlobalVariable *pgcstack_offset{nullptr};
+    void set_pgcstack_attrs(CallInst *pgcstack) const;
+    Instruction *emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const;
     template<typename T> T *add_comdat(T *G) const;
     GlobalVariable *create_aliased_global(Type *T, StringRef name) const;
-    void fix_ptls_use(CallInst *ptlsStates);
+    void fix_pgcstack_use(CallInst *pgcstack);
     bool runOnModule(Module &M) override;
 };
 
-void LowerPTLS::set_ptls_attrs(CallInst *ptlsStates) const
+void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const
 {
-    ptlsStates->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
-    ptlsStates->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+    pgcstack->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+    pgcstack->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
 }
 
-Instruction *LowerPTLS::emit_ptls_tp(Value *offset, Instruction *insertBefore) const
+Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const
 {
+    Value *tls;
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_)
     if (insertBefore->getFunction()->callsFunctionThatReturnsTwice()) {
         // Workaround LLVM bug by hiding the offset computation
@@ -95,47 +98,49 @@ Instruction *LowerPTLS::emit_ptls_tp(Value *offset, Instruction *insertBefore) c
 #  endif
 
         // The add instruction clobbers flags
-        Value *tls;
         if (offset) {
             std::vector<Type*> args(0);
             args.push_back(offset->getType());
             auto tp = InlineAsm::get(FunctionType::get(T_pint8, args, false),
                                      dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false);
-            tls = CallInst::Create(tp, offset, "ptls_i8", insertBefore);
+            tls = CallInst::Create(tp, offset, "pgcstack_i8", insertBefore);
         }
         else {
             auto tp = InlineAsm::get(FunctionType::get(T_pint8, false),
                                      const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}",
                                      false);
-            tls = CallInst::Create(tp, "ptls_i8", insertBefore);
+            tls = CallInst::Create(tp, "pgcstack_i8", insertBefore);
         }
-        return new BitCastInst(tls, T_pppjlvalue, "ptls", insertBefore);
     }
+    else
 #endif
-    // AArch64/ARM doesn't seem to have this issue.
-    // (Possibly because there are many more registers and the offset is
-    // positive and small)
-    // It's also harder to emit the offset in a generic way on ARM/AArch64
-    // (need to generate one or two `add` with shift) so let llvm emit
-    // the add for now.
+    {
+        // AArch64/ARM doesn't seem to have this issue.
+        // (Possibly because there are many more registers and the offset is
+        // positive and small)
+        // It's also harder to emit the offset in a generic way on ARM/AArch64
+        // (need to generate one or two `add` with shift) so let llvm emit
+        // the add for now.
 #if defined(_CPU_AARCH64_)
-    const char *asm_str = "mrs $0, tpidr_el0";
+        const char *asm_str = "mrs $0, tpidr_el0";
 #elif defined(__ARM_ARCH) && __ARM_ARCH >= 7
-    const char *asm_str = "mrc p15, 0, $0, c13, c0, 3";
+        const char *asm_str = "mrc p15, 0, $0, c13, c0, 3";
 #elif defined(_CPU_X86_64_)
-    const char *asm_str = "movq %fs:0, $0";
+        const char *asm_str = "movq %fs:0, $0";
 #elif defined(_CPU_X86_)
-    const char *asm_str = "movl %gs:0, $0";
+        const char *asm_str = "movl %gs:0, $0";
 #else
-    const char *asm_str = nullptr;
-    assert(0 && "Cannot emit thread pointer for this architecture.");
+        const char *asm_str = nullptr;
+        assert(0 && "Cannot emit thread pointer for this architecture.");
 #endif
-    if (!offset)
-        offset = ConstantInt::getSigned(T_size, jl_tls_offset);
-    auto tp = InlineAsm::get(FunctionType::get(T_pint8, false), asm_str, "=r", false);
-    Value *tls = CallInst::Create(tp, "thread_ptr", insertBefore);
-    tls = GetElementPtrInst::Create(T_int8, tls, {offset}, "ptls_i8", insertBefore);
-    return new BitCastInst(tls, T_pppjlvalue, "ptls", insertBefore);
+        if (!offset)
+            offset = ConstantInt::getSigned(T_size, jl_tls_offset);
+        auto tp = InlineAsm::get(FunctionType::get(T_pint8, false), asm_str, "=r", false);
+        tls = CallInst::Create(tp, "thread_ptr", insertBefore);
+        tls = GetElementPtrInst::Create(T_int8, tls, {offset}, "ppgcstack_i8", insertBefore);
+    }
+    tls = new BitCastInst(tls, T_pppjlvalue->getPointerTo(), "ppgcstack", insertBefore);
+    return new LoadInst(T_pppjlvalue, tls, "pgcstack", false, insertBefore);
 }
 
 GlobalVariable *LowerPTLS::create_aliased_global(Type *T, StringRef name) const
@@ -173,98 +178,128 @@ inline T *LowerPTLS::add_comdat(T *G) const
     return G;
 }
 
-void LowerPTLS::fix_ptls_use(CallInst *ptlsStates)
+void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack)
 {
-    if (ptlsStates->use_empty()) {
-        ptlsStates->eraseFromParent();
+    if (pgcstack->use_empty()) {
+        pgcstack->eraseFromParent();
         return;
     }
 
     if (imaging_mode) {
         if (jl_tls_elf_support) {
             // if (offset != 0)
-            //     ptls = tp + offset;
+            //     pgcstack = tp + offset;
             // else
-            //     ptls = getter();
-            auto offset = new LoadInst(T_size, ptls_offset, "", false, ptlsStates);
+            //     pgcstack = getter();
+            auto offset = new LoadInst(T_size, pgcstack_offset, "", false, pgcstack);
             offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
             offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None));
-            auto cmp = new ICmpInst(ptlsStates, CmpInst::ICMP_NE, offset,
+            auto cmp = new ICmpInst(pgcstack, CmpInst::ICMP_NE, offset,
                                     Constant::getNullValue(offset->getType()));
             MDBuilder MDB(*ctx);
             SmallVector<uint32_t, 2> Weights{9, 1};
             TerminatorInst *fastTerm;
             TerminatorInst *slowTerm;
-            SplitBlockAndInsertIfThenElse(cmp, ptlsStates, &fastTerm, &slowTerm,
+            SplitBlockAndInsertIfThenElse(cmp, pgcstack, &fastTerm, &slowTerm,
                                           MDB.createBranchWeights(Weights));
 
-            auto fastTLS = emit_ptls_tp(offset, fastTerm);
-            auto phi = PHINode::Create(T_pppjlvalue, 2, "", ptlsStates);
-            ptlsStates->replaceAllUsesWith(phi);
-            ptlsStates->moveBefore(slowTerm);
-            auto getter = new LoadInst(T_ptls_getter, ptls_slot, "", false, ptlsStates);
+            auto fastTLS = emit_pgcstack_tp(offset, fastTerm);
+            auto phi = PHINode::Create(T_pppjlvalue, 2, "", pgcstack);
+            pgcstack->replaceAllUsesWith(phi);
+            pgcstack->moveBefore(slowTerm);
+            auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
             getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
             getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None));
-            ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), getter);
-            set_ptls_attrs(ptlsStates);
+            pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
+            set_pgcstack_attrs(pgcstack);
 
             phi->addIncoming(fastTLS, fastTLS->getParent());
-            phi->addIncoming(ptlsStates, ptlsStates->getParent());
+            phi->addIncoming(pgcstack, pgcstack->getParent());
 
             return;
         }
         // In imaging mode, we emit the function address as a load of a static
         // variable to be filled (in `staticdata.c`) at initialization time of the sysimg.
-        // This way we can by pass the extra indirection in `jl_get_ptls_states`
+        // This way we can bypass the extra indirection in `jl_get_pgcstack`
         // since we may not know which getter function to use ahead of time.
-        auto getter = new LoadInst(T_ptls_getter, ptls_slot, "", false, ptlsStates);
+        auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
         getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
         getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None));
-        ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), getter);
-        set_ptls_attrs(ptlsStates);
+#if defined(_OS_DARWIN_)
+        auto key = new LoadInst(T_size, pgcstack_key_slot, "", false, pgcstack);
+        key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+        key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None));
+        auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack);
+        new_pgcstack->takeName(pgcstack);
+        pgcstack->replaceAllUsesWith(new_pgcstack);
+        pgcstack->eraseFromParent();
+        pgcstack = new_pgcstack;
+#else
+        pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
+#endif
+        set_pgcstack_attrs(pgcstack);
     }
     else if (jl_tls_offset != -1) {
-        ptlsStates->replaceAllUsesWith(emit_ptls_tp(nullptr, ptlsStates));
-        ptlsStates->eraseFromParent();
+        pgcstack->replaceAllUsesWith(emit_pgcstack_tp(nullptr, pgcstack));
+        pgcstack->eraseFromParent();
     }
     else {
         // use the address of the actual getter function directly
-        auto val = ConstantInt::get(T_size, (uintptr_t)jl_get_ptls_states_getter());
-        ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), ConstantExpr::getIntToPtr(val, T_ptls_getter));
-        set_ptls_attrs(ptlsStates);
+        jl_get_pgcstack_func *f;
+        jl_pgcstack_key_t k;
+        jl_pgcstack_getkey(&f, &k);
+        Constant *val = ConstantInt::get(T_size, (uintptr_t)f);
+        val = ConstantExpr::getIntToPtr(val, T_pgcstack_getter);
+#if defined(_OS_DARWIN_)
+        assert(sizeof(k) == sizeof(uintptr_t));
+        Constant *key = ConstantInt::get(T_size, (uintptr_t)k);
+        auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack);
+        new_pgcstack->takeName(pgcstack);
+        pgcstack->replaceAllUsesWith(new_pgcstack);
+        pgcstack->eraseFromParent();
+        pgcstack = new_pgcstack;
+#else
+        pgcstack->setCalledFunction(pgcstack->getFunctionType(), val);
+#endif
+        set_pgcstack_attrs(pgcstack);
     }
 }
 
 bool LowerPTLS::runOnModule(Module &_M)
 {
     M = &_M;
-    ptls_getter = M->getFunction("julia.ptls_states");
-    if (!ptls_getter)
+    pgcstack_getter = M->getFunction("julia.get_pgcstack");
+    if (!pgcstack_getter)
         return false;
 
     ctx = &M->getContext();
     tbaa_const = tbaa_make_child("jtbaa_const", nullptr, true).first;
 
-    auto FT_ptls_getter = ptls_getter->getFunctionType();
-    T_ptls_getter = FT_ptls_getter->getPointerTo();
-    T_pppjlvalue = cast<PointerType>(FT_ptls_getter->getReturnType());
-    T_ppjlvalue = cast<PointerType>(T_pppjlvalue->getElementType());
     T_int8 = Type::getInt8Ty(*ctx);
     T_size = sizeof(size_t) == 8 ? Type::getInt64Ty(*ctx) : Type::getInt32Ty(*ctx);
     T_pint8 = T_int8->getPointerTo();
+    FT_pgcstack_getter = pgcstack_getter->getFunctionType();
+#if defined(_OS_DARWIN_)
+    assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
+    FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {T_size}, false);
+#endif
+    T_pgcstack_getter = FT_pgcstack_getter->getPointerTo();
+    T_pppjlvalue = cast<PointerType>(FT_pgcstack_getter->getReturnType());
+    T_ppjlvalue = cast<PointerType>(T_pppjlvalue->getElementType());
     if (imaging_mode) {
-        ptls_slot = create_aliased_global(T_ptls_getter, "jl_get_ptls_states_slot");
-        ptls_offset = create_aliased_global(T_size, "jl_tls_offset");
+        pgcstack_func_slot = create_aliased_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
+        pgcstack_key_slot = create_aliased_global(T_size, "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t)
+        pgcstack_offset = create_aliased_global(T_size, "jl_tls_offset");
     }
 
-    for (auto it = ptls_getter->user_begin(); it != ptls_getter->user_end();) {
+    for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
         auto call = cast<CallInst>(*it);
         ++it;
-        assert(call->getCalledOperand() == ptls_getter);
-        fix_ptls_use(call);
+        assert(call->getCalledOperand() == pgcstack_getter);
+        fix_pgcstack_use(call);
     }
-    assert(ptls_getter->use_empty());
-    ptls_getter->eraseFromParent();
+    assert(pgcstack_getter->use_empty());
+    pgcstack_getter->eraseFromParent();
     return true;
 }
 
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index de5633b9057623..ada10c8d5f1f94 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -92,14 +92,7 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
             DstTy = ArrayType::get(
                     remapType(Ty->getElementType()), Ty->getNumElements());
         else if (auto Ty = dyn_cast<VectorType>(SrcTy))
-            DstTy = VectorType::get(remapType(Ty->getElementType()),
-#if JL_LLVM_VERSION >= 110000
-                     Ty
-#else
-                     Ty->getNumElements(),
-                     Ty->isScalable()
-#endif
-                    );
+            DstTy = VectorType::get(remapType(Ty->getElementType()), Ty);
 
         if (DstTy != SrcTy)
             LLVM_DEBUG(
diff --git a/src/llvm-version.h b/src/llvm-version.h
index 0b8a086e60b400..f59f7826c334dc 100644
--- a/src/llvm-version.h
+++ b/src/llvm-version.h
@@ -9,11 +9,8 @@
 #define JL_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 \
                         + LLVM_VERSION_PATCH)
 
-#if JL_LLVM_VERSION < 90000
-    #error Only LLVM versions >= 9.0.0 are supported by Julia
-#endif
-#if JL_LLVM_VERSION < 100000
-#define Align(a) (a)
+#if JL_LLVM_VERSION < 110000
+    #error Only LLVM versions >= 11.0.0 are supported by Julia
 #endif
 
 #ifndef LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
diff --git a/src/locks.h b/src/locks.h
index 262390bb718f31..0605cefbd12180 100644
--- a/src/locks.h
+++ b/src/locks.h
@@ -22,19 +22,18 @@ static inline void jl_mutex_wait(jl_mutex_t *lock, int safepoint)
 {
     jl_thread_t self = jl_thread_self();
     jl_thread_t owner = jl_atomic_load_relaxed(&lock->owner);
+    jl_task_t *ct = jl_current_task;
     if (owner == self) {
         lock->count++;
         return;
     }
     while (1) {
-        if (owner == 0 &&
-            jl_atomic_compare_exchange(&lock->owner, 0, self) == 0) {
+        if (owner == 0 && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
             lock->count = 1;
             return;
         }
         if (safepoint) {
-            jl_ptls_t ptls = jl_get_ptls_states();
-            jl_gc_safepoint_(ptls);
+            jl_gc_safepoint_(ct->ptls);
         }
         jl_cpu_pause();
         owner = jl_atomic_load_relaxed(&lock->owner);
@@ -53,7 +52,7 @@ static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
 
 static inline void jl_lock_frame_push(jl_mutex_t *lock)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     small_arraylist_t *locks = &ptls->locks;
     uint32_t len = locks->len;
     if (__unlikely(len >= locks->max)) {
@@ -66,19 +65,19 @@ static inline void jl_lock_frame_push(jl_mutex_t *lock)
 }
 static inline void jl_lock_frame_pop(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     assert(ptls->locks.len > 0);
     ptls->locks.len--;
 }
 
 #define JL_SIGATOMIC_BEGIN() do {               \
-        jl_get_ptls_states()->defer_signal++;   \
+        jl_current_task->ptls->defer_signal++;  \
         jl_signal_fence();                      \
     } while (0)
 #define JL_SIGATOMIC_END() do {                                 \
         jl_signal_fence();                                      \
-        if (--jl_get_ptls_states()->defer_signal == 0) {        \
-            jl_sigint_safepoint(jl_get_ptls_states());          \
+        if (--jl_current_task->ptls->defer_signal == 0) {       \
+            jl_sigint_safepoint(jl_current_task->ptls);         \
         }                                                       \
     } while (0)
 
@@ -97,8 +96,7 @@ static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock)
         lock->count++;
         return 1;
     }
-    if (owner == 0 &&
-        jl_atomic_compare_exchange(&lock->owner, 0, self) == 0) {
+    if (owner == 0 && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
         lock->count = 1;
         return 1;
     }
@@ -128,12 +126,11 @@ static inline void jl_mutex_unlock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
 
 static inline void jl_mutex_unlock(jl_mutex_t *lock)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
     jl_mutex_unlock_nogc(lock);
     jl_lock_frame_pop();
     JL_SIGATOMIC_END();
     if (jl_gc_have_pending_finalizers) {
-        jl_gc_run_pending_finalizers(ptls); // may GC
+        jl_gc_run_pending_finalizers(jl_current_task); // may GC
     }
 }
 
diff --git a/src/macroexpand.scm b/src/macroexpand.scm
index 882fe88a2e648f..f17f4d3510dc60 100644
--- a/src/macroexpand.scm
+++ b/src/macroexpand.scm
@@ -210,7 +210,8 @@
         ((atom? v) '())
         (else
          (case (car v)
-           ((... kw |::| =) (try-arg-name (cadr v)))
+           ((|::|) (if (length= v 2) '() (try-arg-name (cadr v))))
+           ((... kw =) (try-arg-name (cadr v)))
            ((escape) (list v))
            ((hygienic-scope) (try-arg-name (cadr v)))
            ((meta)  ;; allow certain per-argument annotations
@@ -324,7 +325,7 @@
    m parent-scope inarg))
 
 (define (resolve-expansion-vars- e env m parent-scope inarg)
-  (cond ((or (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal))
+  (cond ((or (eq? e 'begin) (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal) (underscore-symbol? e))
          e)
         ((symbol? e)
          (let ((a (assq e env)))
@@ -351,7 +352,7 @@
                                    ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg))))
                              (else
                               `(global ,(resolve-expansion-vars-with-new-env arg env m parent-scope inarg))))))
-           ((using import export meta line inbounds boundscheck loopinfo) (map unescape e))
+           ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e))
            ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted.
            ((symboliclabel) e)
            ((symbolicgoto) e)
diff --git a/src/method.c b/src/method.c
index 1d3a593e638ed5..c17566ac6f9361 100644
--- a/src/method.c
+++ b/src/method.c
@@ -21,6 +21,25 @@ extern jl_value_t *jl_builtin_tuple;
 jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
     jl_value_t *nargs, jl_value_t *functionloc, jl_code_info_t *ci);
 
+static void check_c_types(const char *where, jl_value_t *rt, jl_value_t *at)
+{
+    if (jl_is_svec(rt))
+        jl_errorf("%s: missing return type", where);
+    JL_TYPECHKS(where, type, rt);
+    if (!jl_type_mappable_to_c(rt))
+        jl_errorf("%s: return type doesn't correspond to a C type", where);
+    JL_TYPECHKS(where, simplevector, at);
+    int i, l = jl_svec_len(at);
+    for (i = 0; i < l; i++) {
+        jl_value_t *ati = jl_svecref(at, i);
+        if (jl_is_vararg(ati))
+            jl_errorf("%s: Vararg not allowed for argument list", where);
+        JL_TYPECHKS(where, type, ati);
+        if (!jl_type_mappable_to_c(ati))
+            jl_errorf("%s: argument %d type doesn't correspond to a C type", where, i + 1);
+    }
+}
+
 // Resolve references to non-locally-defined variables to become references to global
 // variables in `module` (unless the rvalue is one of the type parameters in `sparam_vals`).
 static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_svec_t *sparam_vals,
@@ -46,7 +65,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
             intptr_t label = jl_gotoifnot_label(expr);
             JL_GC_PUSH1(&cond);
             expr = jl_new_struct_uninit(jl_gotoifnot_type);
-            set_nth_field(jl_gotoifnot_type, expr, 0, cond);
+            set_nth_field(jl_gotoifnot_type, expr, 0, cond, 0);
             jl_gotoifnot_label(expr) = label;
             JL_GC_POP();
         }
@@ -65,7 +84,8 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
             e->head == quote_sym || e->head == inert_sym ||
             e->head == meta_sym || e->head == inbounds_sym ||
             e->head == boundscheck_sym || e->head == loopinfo_sym ||
-            e->head == aliasscope_sym || e->head == popaliasscope_sym) {
+            e->head == aliasscope_sym || e->head == popaliasscope_sym ||
+            e->head == inline_sym || e->head == noinline_sym) {
             // ignore these
         }
         else {
@@ -120,10 +140,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                     }
                     jl_exprargset(e, 3, at);
                 }
-                if (jl_is_svec(rt))
-                    jl_error("cfunction: missing return type");
-                JL_TYPECHK(cfunction method definition, type, rt);
-                JL_TYPECHK(cfunction method definition, simplevector, at);
+                check_c_types("cfunction method definition", rt, at);
                 JL_TYPECHK(cfunction method definition, quotenode, jl_exprarg(e, 4));
                 JL_TYPECHK(cfunction method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
                 return expr;
@@ -156,10 +173,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                     }
                     jl_exprargset(e, 2, at);
                 }
-                if (jl_is_svec(rt))
-                    jl_error("ccall: missing return type");
-                JL_TYPECHK(ccall method definition, type, rt);
-                JL_TYPECHK(ccall method definition, simplevector, at);
+                check_c_types("ccall method definition", rt, at);
                 JL_TYPECHK(ccall method definition, long, jl_exprarg(e, 3));
                 JL_TYPECHK(ccall method definition, quotenode, jl_exprarg(e, 4));
                 JL_TYPECHK(ccall method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
@@ -230,7 +244,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
     return expr;
 }
 
-void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
                               int binding_effects)
 {
     size_t i, l = jl_array_len(stmts);
@@ -240,6 +254,11 @@ void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *spar
     }
 }
 
+jl_value_t *expr_arg1(jl_value_t *expr) {
+    jl_array_t *args = ((jl_expr_t*)expr)->args;
+    return jl_array_ptr_ref(args, 0);
+}
+
 // copy a :lambda Expr into its CodeInfo representation,
 // including popping of known meta nodes
 static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
@@ -261,8 +280,17 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
     jl_gc_wb(li, li->code);
     size_t n = jl_array_len(body);
     jl_value_t **bd = (jl_value_t**)jl_array_ptr_data((jl_array_t*)li->code);
+    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, n);
+    jl_gc_wb(li, li->ssaflags);
+    int inbounds_depth = 0; // number of stacked inbounds
+    // isempty(inline_flags): no user annotation
+    // last(inline_flags) == 1: inline region
+    // last(inline_flags) == 0: noinline region
+    arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
     for (j = 0; j < n; j++) {
         jl_value_t *st = bd[j];
+        int is_flag_stmt = 0;
+        // check :meta expression
         if (jl_is_expr(st) && ((jl_expr_t*)st)->head == meta_sym) {
             size_t k, ins = 0, na = jl_expr_nargs(st);
             jl_array_t *meta = ((jl_expr_t*)st)->args;
@@ -284,10 +312,60 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
             else
                 jl_array_del_end(meta, na - ins);
         }
+        // check other flag expressions
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == inbounds_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true)       // push
+                inbounds_depth += 1;
+            else if (arg1 == (jl_value_t*)jl_false) // clear
+                inbounds_depth = 0;
+            else if (inbounds_depth > 0)            // pop
+                inbounds_depth -= 1;
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == inline_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true) // enter inline region
+                arraylist_push(inline_flags, (void*)1);
+            else {                            // exit inline region
+                assert(arg1 == (jl_value_t*)jl_false);
+                arraylist_pop(inline_flags);
+            }
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == noinline_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true) // enter noinline region
+                arraylist_push(inline_flags, (void*)0);
+            else {                             // exit noinline region
+                assert(arg1 == (jl_value_t*)jl_false);
+                arraylist_pop(inline_flags);
+            }
+            bd[j] = jl_nothing;
+        }
         else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == return_sym) {
             jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0)));
         }
+
+        if (is_flag_stmt)
+            jl_array_uint8_set(li->ssaflags, j, 0);
+        else {
+            uint8_t flag = 0;
+            if (inbounds_depth > 0)
+                flag |= 1 << 0;
+            if (inline_flags->len > 0) {
+                void* inline_flag = inline_flags->items[inline_flags->len - 1];
+                flag |= 1 << (inline_flag ? 1 : 2);
+            }
+            jl_array_uint8_set(li->ssaflags, j, flag);
+        }
     }
+    assert(inline_flags->len == 0); // malformed otherwise
+    arraylist_free(inline_flags);
+    free(inline_flags);
     jl_array_t *vinfo = (jl_array_t*)jl_exprarg(ir, 1);
     jl_array_t *vis = (jl_array_t*)jl_array_ptr_ref(vinfo, 0);
     size_t nslots = jl_array_len(vis);
@@ -300,7 +378,6 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
     jl_gc_wb(li, li->slotflags);
     li->ssavaluetypes = jl_box_long(nssavalue);
     jl_gc_wb(li, li->ssavaluetypes);
-    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, 0);
 
     // Flags that need to be copied to slotflags
     const uint8_t vinfo_mask = 8 | 16 | 32 | 64;
@@ -327,9 +404,9 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
 
 JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_method_instance_t *li =
-        (jl_method_instance_t*)jl_gc_alloc(ptls, sizeof(jl_method_instance_t),
+        (jl_method_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_instance_t),
                                            jl_method_instance_type);
     li->def.value = NULL;
     li->specTypes = NULL;
@@ -344,9 +421,9 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
 
 JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_code_info_t *src =
-        (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t),
+        (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t),
                                        jl_code_info_type);
     src->code = NULL;
     src->codelocs = NULL;
@@ -454,15 +531,15 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
     jl_code_info_t *func = NULL;
     jl_value_t *ex = NULL;
     JL_GC_PUSH2(&ex, &func);
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     int last_lineno = jl_lineno;
-    int last_in = ptls->in_pure_callback;
-    size_t last_age = jl_get_ptls_states()->world_age;
+    int last_in = ct->ptls->in_pure_callback;
+    size_t last_age = ct->world_age;
 
     JL_TRY {
-        ptls->in_pure_callback = 1;
+        ct->ptls->in_pure_callback = 1;
         // and the right world
-        ptls->world_age = def->primary_world;
+        ct->world_age = def->primary_world;
 
         // invoke code generator
         jl_tupletype_t *ttdt = (jl_tupletype_t*)jl_unwrap_unionall(tt);
@@ -479,7 +556,7 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
 
             if (!jl_is_code_info(func)) {
                 if (jl_is_expr(func) && ((jl_expr_t*)func)->head == error_sym) {
-                    ptls->in_pure_callback = 0;
+                    ct->ptls->in_pure_callback = 0;
                     jl_toplevel_eval(def->module, (jl_value_t*)func);
                 }
                 jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator.");
@@ -497,13 +574,13 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
             }
         }
 
-        ptls->in_pure_callback = last_in;
+        ct->ptls->in_pure_callback = last_in;
         jl_lineno = last_lineno;
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
         jl_add_function_name_to_lineinfo(func, (jl_value_t*)def->name);
     }
     JL_CATCH {
-        ptls->in_pure_callback = last_in;
+        ct->ptls->in_pure_callback = last_in;
         jl_lineno = last_lineno;
         jl_rethrow();
     }
@@ -513,9 +590,9 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
 
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_code_info_t *newsrc =
-        (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t),
+        (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t),
                                        jl_code_info_type);
     *newsrc = *src;
     return newsrc;
@@ -642,9 +719,9 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
 
 JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_method_t *m =
-        (jl_method_t*)jl_gc_alloc(ptls, sizeof(jl_method_t), jl_method_type);
+        (jl_method_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_t), jl_method_type);
     m->specializations = jl_emptysvec;
     m->speckeyset = (jl_array_t*)jl_an_empty_vec_any;
     m->sig = NULL;
@@ -652,6 +729,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
     m->roots = NULL;
     m->ccallable = NULL;
     m->module = module;
+    m->external_mt = NULL;
     m->source = NULL;
     m->unspecialized = NULL;
     m->generator = NULL;
@@ -764,6 +842,11 @@ JL_DLLEXPORT jl_methtable_t *jl_method_table_for(jl_value_t *argtypes JL_PROPAGA
     return first_methtable(argtypes, 0);
 }
 
+JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return method->external_mt ? (jl_methtable_t*)method->external_mt : jl_method_table_for(method->sig);
+}
+
 // get the MethodTable implied by a single given type, or `nothing`
 JL_DLLEXPORT jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
@@ -773,6 +856,7 @@ JL_DLLEXPORT jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAG
 jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
 
 JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
+                                        jl_methtable_t *mt,
                                         jl_code_info_t *f,
                                         jl_module_t *module)
 {
@@ -801,7 +885,9 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
         argtype = jl_new_struct(jl_unionall_type, tv, argtype);
     }
 
-    jl_methtable_t *mt = jl_method_table_for(argtype);
+    jl_methtable_t *external_mt = mt;
+    if (!mt)
+        mt = jl_method_table_for(argtype);
     if ((jl_value_t*)mt == jl_nothing)
         jl_error("Method dispatch is unimplemented currently for this method signature");
     if (mt->frozen)
@@ -809,7 +895,7 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
 
     // TODO: derive our debug name from the syntax instead of the type
     name = mt->name;
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt) {
+    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || external_mt) {
         // our value for `name` is bad, try to guess what the syntax might have had,
         // like `jl_static_show_func_sig` might have come up with
         jl_datatype_t *dt = jl_first_argument_datatype(argtype);
@@ -830,6 +916,9 @@ JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
         f = jl_new_code_info_from_ir((jl_expr_t*)f);
     }
     m = jl_new_method_uninit(module);
+    m->external_mt = (jl_value_t*)external_mt;
+    if (external_mt)
+        jl_gc_wb(m, external_mt);
     m->sig = argtype;
     m->name = name;
     m->isva = isva;
diff --git a/src/module.c b/src/module.c
index a136095927a51a..a7db957d2d9c37 100644
--- a/src/module.c
+++ b/src/module.c
@@ -13,9 +13,9 @@ extern "C" {
 
 JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, uint8_t default_names)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     const jl_uuid_t uuid_zero = {0, 0};
-    jl_module_t *m = (jl_module_t*)jl_gc_alloc(ptls, sizeof(jl_module_t),
+    jl_module_t *m = (jl_module_t*)jl_gc_alloc(ct->ptls, sizeof(jl_module_t),
                                                jl_module_type);
     assert(jl_is_symbol(name));
     m->name = name;
@@ -140,9 +140,9 @@ JL_DLLEXPORT uint8_t jl_istopmod(jl_module_t *mod)
 
 static jl_binding_t *new_binding(jl_sym_t *name)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(jl_is_symbol(name));
-    jl_binding_t *b = (jl_binding_t*)jl_gc_alloc_buf(ptls, sizeof(jl_binding_t));
+    jl_binding_t *b = (jl_binding_t*)jl_gc_alloc_buf(ct->ptls, sizeof(jl_binding_t));
     b->name = name;
     b->value = NULL;
     b->owner = NULL;
@@ -157,7 +157,7 @@ static jl_binding_t *new_binding(jl_sym_t *name)
 // get binding for assignment
 JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int error)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&m->bindings, var);
     jl_binding_t *b = *bp;
 
@@ -167,7 +167,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT,
                 b->owner = m;
             }
             else if (error) {
-                JL_UNLOCK_NOGC(&m->lock);
+                JL_UNLOCK(&m->lock);
                 jl_errorf("cannot assign a value to variable %s.%s from module %s",
                           jl_symbol_name(b->owner->name), jl_symbol_name(var), jl_symbol_name(m->name));
             }
@@ -177,10 +177,11 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT,
         b = new_binding(var);
         b->owner = m;
         *bp = b;
+        JL_GC_PROMISE_ROOTED(b);
         jl_gc_wb_buf(m, b, sizeof(jl_binding_t));
     }
 
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b;
 }
 
@@ -215,7 +216,7 @@ JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var
 // like jl_get_binding_wr, but has different error paths
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t **bp = _jl_get_module_binding_bp(m, var);
     jl_binding_t *b = *bp;
 
@@ -225,7 +226,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_
                 b->owner = m;
             }
             else {
-                JL_UNLOCK_NOGC(&m->lock);
+                JL_UNLOCK(&m->lock);
                 jl_binding_t *b2 = jl_get_binding(b->owner, b->name);
                 if (b2 == NULL || b2->value == NULL)
                     jl_errorf("invalid method definition: imported function %s.%s does not exist",
@@ -246,7 +247,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_
         jl_gc_wb_buf(m, b, sizeof(jl_binding_t));
     }
 
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b;
 }
 
@@ -590,33 +591,33 @@ JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var)
 
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, var);
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b != HT_NOTFOUND && (b->exportp || b->owner==m);
 }
 
-JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b != HT_NOTFOUND && b->exportp;
 }
 
-JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b != HT_NOTFOUND && b->owner != NULL;
 }
 
-JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b == HT_NOTFOUND ? NULL : b;
 }
 
@@ -633,7 +634,6 @@ JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *va
     JL_TYPECHK(jl_set_global, module, (jl_value_t*)m);
     JL_TYPECHK(jl_set_global, symbol, (jl_value_t*)var);
     jl_binding_t *bp = jl_get_binding_wr(m, var, 1);
-    JL_GC_PROMISE_ROOTED(bp);
     jl_checked_assignment(bp, val);
 }
 
@@ -641,8 +641,10 @@ JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var
 {
     jl_binding_t *bp = jl_get_binding_wr(m, var, 1);
     if (bp->value == NULL) {
-        if (jl_atomic_bool_compare_exchange(&bp->constp, 0, 1)) {
-            if (jl_atomic_bool_compare_exchange(&bp->value, NULL, val)) {
+        uint8_t constp = 0;
+        if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
+            jl_value_t *old = NULL;
+            if (jl_atomic_cmpswap(&bp->value, &old, val)) {
                 jl_gc_wb_binding(bp, val);
                 return;
             }
@@ -766,8 +768,8 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b)
 JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs) JL_NOTSAFEPOINT
 {
     if (b->constp) {
-        jl_value_t *old = jl_atomic_compare_exchange(&b->value, NULL, rhs);
-        if (old == NULL) {
+        jl_value_t *old = NULL;
+        if (jl_atomic_cmpswap(&b->value, &old, rhs)) {
             jl_gc_wb_binding(b, rhs);
             return;
         }
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index a7298e38052185..4a23c604b079d8 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -5,12 +5,12 @@ JL_DLLEXPORT jl_value_t *jl_invoke_opaque_closure(jl_opaque_closure_t *oc, jl_va
 {
     jl_value_t *ret = NULL;
     JL_GC_PUSH1(&ret);
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
-    ptls->world_age = oc->world;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = oc->world;
     ret = jl_interpret_opaque_closure(oc, args, nargs);
     jl_typeassert(ret, jl_tparam1(jl_typeof(oc)));
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     JL_GC_POP();
     return ret;
 }
@@ -25,14 +25,14 @@ jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *isv
     JL_TYPECHK(new_opaque_closure, type, rt_lb);
     JL_TYPECHK(new_opaque_closure, type, rt_ub);
     JL_TYPECHK(new_opaque_closure, method, source);
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_value_t *oc_type JL_ALWAYS_LEAFTYPE;
     oc_type = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, rt_ub);
     JL_GC_PROMISE_ROOTED(oc_type);
     jl_value_t *captures = NULL;
     JL_GC_PUSH1(&captures);
     captures = jl_f_tuple(NULL, env, nenv);
-    jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ptls, sizeof(jl_opaque_closure_t), oc_type);
+    jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ct->ptls, sizeof(jl_opaque_closure_t), oc_type);
     JL_GC_POP();
     oc->source = (jl_method_t*)source;
     oc->isva = jl_unbox_bool(isva);
@@ -53,7 +53,7 @@ JL_CALLABLE(jl_new_opaque_closure_jlcall)
 
 
 // check whether the specified number of arguments is compatible with the
-// specified number of paramters of the tuple type
+// specified number of parameters of the tuple type
 STATIC_INLINE int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTSAFEPOINT
 {
     v = jl_unwrap_unionall(v);
@@ -76,7 +76,7 @@ JL_CALLABLE(jl_f_opaque_closure_call)
     jl_opaque_closure_t* oc = (jl_opaque_closure_t*)F;
     jl_value_t *argt = jl_tparam0(jl_typeof(oc));
     if (!jl_tupletype_length_compat(argt, nargs))
-        jl_error("Incorrect argument count for OpaqueClosure");
+        jl_method_error(F, args, nargs + 1, oc->world);
     argt = jl_unwrap_unionall(argt);
     assert(jl_is_datatype(argt));
     jl_svec_t *types = jl_get_fieldtypes((jl_datatype_t*)argt);
diff --git a/src/options.h b/src/options.h
index 3ffbf05b2249ff..36f34654b2bd09 100644
--- a/src/options.h
+++ b/src/options.h
@@ -1,5 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include "platform.h"
+
 #ifndef JL_OPTIONS_H
 #define JL_OPTIONS_H
 
@@ -12,10 +14,14 @@
 
 // object layout options ------------------------------------------------------
 
-// how much space we're willing to waste if an array outgrows its
-// original object
+// The data for an array this size or below will be allocated within the
+// Array object. If the array outgrows that space, it will be wasted.
 #define ARRAY_INLINE_NBYTES (2048*sizeof(void*))
 
+// Arrays at least this size will get larger alignment (JL_CACHE_BYTE_ALIGNMENT).
+// Must be bigger than GC_MAX_SZCLASS.
+#define ARRAY_CACHE_ALIGN_THRESHOLD 2048
+
 // codegen options ------------------------------------------------------------
 
 // (Experimental) Use MCJIT ELF, even where it's not the native format
@@ -114,7 +120,7 @@
 #endif
 
 // allow a suspended Task to restart on a different thread
-//#define MIGRATE_TASKS
+#define MIGRATE_TASKS
 
 // threading options ----------------------------------------------------------
 
@@ -154,23 +160,19 @@
 
 // sanitizer defaults ---------------------------------------------------------
 
-#ifndef JULIA_H
-#error "Must be included after julia.h"
-#endif
-
 // Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers
-#if defined(JL_ASAN_ENABLED) || defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
 #define MEMDEBUG
 #define KEEP_BODIES
 #endif
 
 // TSAN doesn't like COPY_STACKS
-#if defined(JL_TSAN_ENABLED) && defined(COPY_STACKS)
+#if defined(_COMPILER_TSAN_ENABLED_) && defined(COPY_STACKS)
 #undef COPY_STACKS
 #endif
 
 // Memory sanitizer needs TLS, which llvm only supports for the small memory model
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
 // todo: fix the llvm MemoryManager to work with small memory model
 #endif
 
diff --git a/src/partr.c b/src/partr.c
index 782b418ebd882b..c3de56b80cc920 100644
--- a/src/partr.c
+++ b/src/partr.c
@@ -44,7 +44,7 @@ JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT
     if (was == tid)
         return 1;
     if (was == -1)
-        return jl_atomic_bool_compare_exchange(&task->tid, -1, tid);
+        return jl_atomic_cmpswap(&task->tid, &was, tid);
     return 0;
 }
 
@@ -126,7 +126,7 @@ static inline void sift_down(taskheap_t *heap, int32_t idx)
 
 static inline int multiq_insert(jl_task_t *task, int16_t priority)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     uint64_t rn;
 
     task->prio = priority;
@@ -153,12 +153,13 @@ static inline int multiq_insert(jl_task_t *task, int16_t priority)
 
 static inline jl_task_t *multiq_deletemin(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     uint64_t rn1 = 0, rn2;
     int32_t i;
     int16_t prio1, prio2;
     jl_task_t *task;
  retry:
+    jl_gc_safepoint();
     for (i = 0; i < heap_p; ++i) {
         rn1 = cong(heap_p, cong_unbias, &ptls->rngseed);
         rn2 = cong(heap_p, cong_unbias, &ptls->rngseed);
@@ -228,7 +229,8 @@ void jl_init_threadinginfra(void)
     /* initialize the synchronization trees pool and the multiqueue */
     multiq_init();
 
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_install_thread_signal_handler(ptls);
     uv_mutex_init(&ptls->sleep_lock);
     uv_cond_init(&ptls->wake_signal);
 }
@@ -242,12 +244,11 @@ void jl_threadfun(void *arg)
     jl_threadarg_t *targ = (jl_threadarg_t*)arg;
 
     // initialize this thread (set tid, create heap, set up root task)
-    jl_init_threadtls(targ->tid);
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(0, &stack_lo, &stack_hi);
-    jl_init_root_task(stack_lo, stack_hi);
-
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_init_root_task(ptls, stack_lo, stack_hi);
+    jl_install_thread_signal_handler(ptls);
 
     // set up sleep mechanism for this thread
     uv_mutex_init(&ptls->sleep_lock);
@@ -329,7 +330,9 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
 static void wake_thread(int16_t tid)
 {
     jl_ptls_t other = jl_all_tls_states[tid];
-    if (jl_atomic_bool_compare_exchange(&other->sleep_check_state, sleeping, not_sleeping)) {
+    uint8_t state = sleeping;
+    jl_atomic_cmpswap(&other->sleep_check_state, &state, not_sleeping);
+    if (state == sleeping) {
         uv_mutex_lock(&other->sleep_lock);
         uv_cond_signal(&other->wake_signal);
         uv_mutex_unlock(&other->sleep_lock);
@@ -347,7 +350,7 @@ static void wake_libuv(void)
 /* ensure thread tid is awake if necessary */
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_thread_t uvlock = jl_atomic_load(&jl_uv_mutex.owner);
     int16_t self = ptls->tid;
     jl_thread_t system_self = jl_all_tls_states[self]->system_id;
@@ -391,11 +394,10 @@ static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
     jl_value_t *args[2] = { trypoptask, q };
     jl_task_t *task = (jl_task_t*)jl_apply(args, 2);
     if (jl_typeis(task, jl_task_type)) {
-        int self = jl_get_ptls_states()->tid;
+        int self = jl_current_task->tid;
         jl_set_task_tid(task, self);
         return task;
     }
-    jl_gc_safepoint();
     return multiq_deletemin();
 }
 
@@ -411,12 +413,11 @@ extern volatile unsigned _threadedregion;
 
 JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     uint64_t start_cycles = 0;
-    jl_task_t *task;
 
     while (1) {
-        task = get_next_task(trypoptask, q);
+        jl_task_t *task = get_next_task(trypoptask, q);
         if (task)
             return task;
 
@@ -428,6 +429,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
         }
 
         jl_cpu_pause();
+        jl_ptls_t ptls = ct->ptls;
         if (sleep_check_after_threshold(&start_cycles) || (!_threadedregion && ptls->tid == 0)) {
             jl_atomic_store(&ptls->sleep_check_state, sleeping); // acquire sleep-check lock
             if (!multiq_check_empty()) {
@@ -435,7 +437,9 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
                     jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
                 continue;
             }
-            task = get_next_task(trypoptask, q);
+            task = get_next_task(trypoptask, q); // WARNING: this should not yield
+            if (ptls != ct->ptls)
+                continue;
             if (task) {
                 if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
                     jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
diff --git a/src/precompile.c b/src/precompile.c
index 886a0677b18990..9f6fa1a79e8a6a 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -353,9 +353,9 @@ static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *c
         jl_svec_t *specializations = def->func.method->specializations;
         size_t i, l = jl_svec_len(specializations);
         for (i = 0; i < l; i++) {
-            jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
-            if (mi != NULL)
-                precompile_enq_specialization_(mi, closure);
+            jl_value_t *mi = jl_svecref(specializations, i);
+            if (mi != jl_nothing)
+                precompile_enq_specialization_((jl_method_instance_t*)mi, closure);
         }
     }
     if (m->ccallable)
diff --git a/src/processor.cpp b/src/processor.cpp
index 9d4ac476ef3165..c5e42368412e8d 100644
--- a/src/processor.cpp
+++ b/src/processor.cpp
@@ -401,6 +401,8 @@ static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
 {
     std::vector<uint8_t> res;
     auto add_data = [&] (const void *data, size_t sz) {
+        if (sz == 0)
+            return;
         size_t old_sz = res.size();
         res.resize(old_sz + sz);
         memcpy(&res[old_sz], data, sz);
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index f15f2051187734..a411314e34e9d7 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -215,7 +215,7 @@ static constexpr FeatureDep deps[] = {
     {ccdp, ccpp},
     {sve, fullfp16},
     {fp16fml, fullfp16},
-    {altnzcv, fmi},
+    {altnzcv, flagm},
     {sve2, sve},
     {sve2_aes, sve2},
     {sve2_aes, aes},
@@ -237,7 +237,7 @@ constexpr auto armv8_2a = armv8_1a | get_feature_masks(v8_2a, ccpp);
 constexpr auto armv8_2a_crypto = armv8_2a | get_feature_masks(aes, sha2);
 constexpr auto armv8_3a = armv8_2a | get_feature_masks(v8_3a, jsconv, complxnum, rcpc);
 constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2);
-constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, fmi);
+constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm);
 constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2);
 constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint);
 constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
@@ -256,7 +256,7 @@ constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
 //     .SM4: sm4
 //     .DP: dotprod
 //     .FHM: fp16fml
-//     .TS: fmi, altnzcz
+//     .TS: flagm, altnzcz
 //     .RNDR: rand
 
 // ID_AA64ISAR1_EL1
@@ -1191,7 +1191,7 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
     features[1] = (uint32_t)jl_getauxval(AT_HWCAP2);
 #ifdef _CPU_AARCH64_
     if (test_nbit(features, 31)) // HWCAP_PACG
-        set_bit(features, Feature::pa, true);
+        set_bit(features, Feature::pauth, true);
 #endif
     auto cpuinfo = get_cpuinfo();
     auto arch = get_elf_arch();
@@ -1419,7 +1419,7 @@ static inline void enable_depends(FeatureList<n> &features)
     if (test_nbit(features, Feature::v8_4a)) {
         set_bit(features, Feature::dit, true);
         set_bit(features, Feature::rcpc_immo, true);
-        set_bit(features, Feature::fmi, true);
+        set_bit(features, Feature::flagm, true);
     }
     if (test_nbit(features, Feature::v8_5a)) {
         set_bit(features, Feature::sb, true);
@@ -1623,10 +1623,8 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
             feature_strs.push_back(std::string("-") + fename_str);
         }
     }
-#if JL_LLVM_VERSION >= 110000
     if (test_nbit(features, Feature::v8_6a))
         feature_strs.push_back("+v8.6a");
-#endif
     if (test_nbit(features, Feature::v8_5a))
         feature_strs.push_back("+v8.5a");
     if (test_nbit(features, Feature::v8_4a))
@@ -1760,13 +1758,7 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     auto max_feature = get_max_feature();
     static const auto res = get_llvm_target_str(TargetData<feature_sz>{host_cpu_name(),
 #ifdef _CPU_AARCH64_
-#  if JL_LLVM_VERSION > 110000
-                "+ecv,"
-#  endif
-#  if JL_LLVM_VERSION > 100000
-                "+tme,"
-#  endif
-                "+am,+specrestrict,+predres,+lor,+perfmon,+spe,+tracev8.4",
+                "+ecv,+tme,+am,+specrestrict,+predres,+lor,+perfmon,+spe,+tracev8.4",
 #else
                 "+dotprod",
 #endif
@@ -1810,6 +1802,8 @@ extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
 #ifdef _CPU_AARCH64_
 // FPCR FZ, bit [24]
 static constexpr uint32_t fpcr_fz_mask = 1 << 24;
+// FPCR FZ16, bit [19]
+static constexpr uint32_t fpcr_fz16_mask = 1 << 19;
 // FPCR DN, bit [25]
 static constexpr uint32_t fpcr_dn_mask = 1 << 25;
 
@@ -1833,7 +1827,8 @@ extern "C" JL_DLLEXPORT int32_t jl_get_zero_subnormals(void)
 extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
 {
     uint32_t fpcr = get_fpcr_aarch64();
-    fpcr = isZero ? (fpcr | fpcr_fz_mask) : (fpcr & ~fpcr_fz_mask);
+    static uint32_t mask = fpcr_fz_mask | (jl_test_cpu_feature(JL_AArch64_fullfp16) ? fpcr_fz16_mask : 0);
+    fpcr = isZero ? (fpcr | mask) : (fpcr & ~mask);
     set_fpcr_aarch64(fpcr);
     return 0;
 }
diff --git a/src/rtutils.c b/src/rtutils.c
index 99fce51128345c..67d17c39c67ec9 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -132,6 +132,14 @@ JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var)
     jl_throw(jl_new_struct(jl_undefvarerror_type, var));
 }
 
+JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str) // == jl_exceptionf(jl_atomicerror_type, "%s", str)
+{
+    jl_value_t *msg = jl_pchar_to_string((char*)str, strlen(str));
+    JL_GC_PUSH1(&msg);
+    jl_throw(jl_new_struct(jl_atomicerror_type, msg));
+}
+
+
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v, jl_value_t *t)
 {
     JL_GC_PUSH2(&v, &t); // root arguments so the caller doesn't need to
@@ -210,18 +218,17 @@ JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t)
 
 JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_task_t *current_task = ptls->current_task;
+    jl_task_t *ct = jl_current_task;
     // Must have no safepoint
-    eh->prev = current_task->eh;
-    eh->gcstack = ptls->pgcstack;
-    eh->gc_state = ptls->gc_state;
-    eh->locks_len = ptls->locks.len;
-    eh->defer_signal = ptls->defer_signal;
-    eh->world_age = ptls->world_age;
-    current_task->eh = eh;
+    eh->prev = ct->eh;
+    eh->gcstack = ct->gcstack;
+    eh->gc_state = ct->ptls->gc_state;
+    eh->locks_len = ct->ptls->locks.len;
+    eh->defer_signal = ct->ptls->defer_signal;
+    eh->world_age = ct->world_age;
+    ct->eh = eh;
 #ifdef ENABLE_TIMINGS
-    eh->timing_stack = ptls->timing_stack;
+    eh->timing_stack = ct->ptls->timing_stack;
 #endif
 }
 
@@ -232,50 +239,49 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
 //   there's additional cleanup required, eg pushing the exception stack.
 JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
 #ifdef _OS_WINDOWS_
-    if (ptls->needs_resetstkoflw) {
+    if (ct->ptls->needs_resetstkoflw) {
         _resetstkoflw();
-        ptls->needs_resetstkoflw = 0;
+        ct->ptls->needs_resetstkoflw = 0;
     }
 #endif
-    jl_task_t *current_task = ptls->current_task;
-    // `eh` may be not equal to `ptls->current_task->eh`. See `jl_pop_handler`
+    // `eh` may be not equal to `ct->eh`. See `jl_pop_handler`
     // This function should **NOT** have any safepoint before the ones at the
     // end.
-    sig_atomic_t old_defer_signal = ptls->defer_signal;
-    int8_t old_gc_state = ptls->gc_state;
-    current_task->eh = eh->prev;
-    ptls->pgcstack = eh->gcstack;
-    small_arraylist_t *locks = &ptls->locks;
+    sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
+    int8_t old_gc_state = ct->ptls->gc_state;
+    ct->eh = eh->prev;
+    ct->gcstack = eh->gcstack;
+    small_arraylist_t *locks = &ct->ptls->locks;
     int unlocks = locks->len > eh->locks_len;
     if (unlocks) {
         for (size_t i = locks->len; i > eh->locks_len; i--)
             jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
         locks->len = eh->locks_len;
     }
-    ptls->world_age = eh->world_age;
-    ptls->defer_signal = eh->defer_signal;
+    ct->world_age = eh->world_age;
+    ct->ptls->defer_signal = eh->defer_signal;
     if (old_gc_state != eh->gc_state) {
-        jl_atomic_store_release(&ptls->gc_state, eh->gc_state);
+        jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state);
         if (old_gc_state) {
-            jl_gc_safepoint_(ptls);
+            jl_gc_safepoint_(ct->ptls);
         }
     }
     if (old_defer_signal && !eh->defer_signal) {
-        jl_sigint_safepoint(ptls);
+        jl_sigint_safepoint(ct->ptls);
     }
     if (jl_gc_have_pending_finalizers && unlocks && eh->locks_len == 0) {
-        jl_gc_run_pending_finalizers(ptls);
+        jl_gc_run_pending_finalizers(ct);
     }
 }
 
 JL_DLLEXPORT void jl_pop_handler(int n)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (__unlikely(n <= 0))
         return;
-    jl_handler_t *eh = ptls->current_task->eh;
+    jl_handler_t *eh = ct->eh;
     while (--n > 0)
         eh = eh->prev;
     jl_eh_restore_state(eh);
@@ -283,15 +289,15 @@ JL_DLLEXPORT void jl_pop_handler(int n)
 
 JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_excstack_t *s = ptls->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *s = ct->excstack;
     return s ? s->top : 0;
 }
 
 JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_excstack_t *s = ptls->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *s = ct->excstack;
     if (s) {
         assert(s->top >= state);
         s->top = state;
@@ -312,7 +318,8 @@ static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
     if (s && s->reserved_size >= reserved_size)
         return;
     size_t bufsz = sizeof(jl_excstack_t) + sizeof(uintptr_t)*reserved_size;
-    jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(jl_get_ptls_states(), bufsz);
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(ct->ptls, bufsz);
     new_s->top = 0;
     new_s->reserved_size = reserved_size;
     if (s)
@@ -354,15 +361,17 @@ JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a)
 JL_DLLEXPORT void jl_set_nth_field(jl_value_t *v, size_t idx0, jl_value_t *rhs)
 {
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
-    if (!st->mutabl)
-        jl_errorf("setfield! immutable struct of type %s cannot be changed", jl_symbol_name(st->name->name));
+    if (!st->name->mutabl)
+        jl_errorf("setfield!: immutable struct of type %s cannot be changed", jl_symbol_name(st->name->name));
     if (idx0 >= jl_datatype_nfields(st))
         jl_bounds_error_int(v, idx0 + 1);
     //jl_value_t *ft = jl_field_type(st, idx0);
     //if (!jl_isa(rhs, ft)) {
     //    jl_type_error("setfield!", ft, rhs);
     //}
-    set_nth_field(st, (void*)v, idx0, rhs);
+    //int isatomic = jl_field_isatomic(st, idx0);
+    //if (isatomic) ...
+    set_nth_field(st, v, idx0, rhs, 0);
 }
 
 
@@ -1091,10 +1100,11 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             size_t i = 0;
             if (vt == jl_typemap_entry_type)
                 i = 1;
+            jl_value_t *names = isnamedtuple ? jl_tparam0(vt) : (jl_value_t*)jl_field_names(vt);
             for (; i < tlen; i++) {
                 if (!istuple) {
-                    n += jl_printf(out, "%s", jl_symbol_name(jl_field_name(vt, i)));
-                    n += jl_printf(out, "=");
+                    jl_value_t *fname = isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i);
+                    n += jl_printf(out, "%s=", jl_symbol_name((jl_sym_t*)fname));
                 }
                 size_t offs = jl_field_offset(vt, i);
                 char *fld_ptr = (char*)v + offs;
@@ -1270,10 +1280,9 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
 
 JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
-    ptls->safe_restore = &buf;
+    jl_set_safe_restore(&buf);
     if (!jl_setjmp(buf, 0)) {
         jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)jl_value);
         jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
@@ -1281,7 +1290,7 @@ JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT
     else {
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n!!! ERROR in jl_ -- ABORTING !!!\n");
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
 }
 
 JL_DLLEXPORT void jl_breakpoint(jl_value_t *v)
diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp
index 307acc9c28cd16..ba265eb67be764 100644
--- a/src/runtime_ccall.cpp
+++ b/src/runtime_ccall.cpp
@@ -27,9 +27,8 @@ using namespace llvm;
 static std::map<std::string, void*> libMap;
 static jl_mutex_t libmap_lock;
 extern "C"
-void *jl_get_library_(const char *f_lib, int throw_err) JL_NOTSAFEPOINT
+void *jl_get_library_(const char *f_lib, int throw_err)
 {
-    void *hnd;
     if (f_lib == NULL)
         return jl_RTLD_DEFAULT_handle;
 #ifdef _OS_WINDOWS_
@@ -40,23 +39,22 @@ void *jl_get_library_(const char *f_lib, int throw_err) JL_NOTSAFEPOINT
     if (strcmp(f_lib, JL_LIBJULIA_DL_LIBNAME) == 0)
         return jl_libjulia_handle;
 #endif
-    JL_LOCK_NOGC(&libmap_lock);
+    JL_LOCK(&libmap_lock);
     // This is the only operation we do on the map, which doesn't invalidate
     // any references or iterators.
     void **map_slot = &libMap[f_lib];
-    JL_UNLOCK_NOGC(&libmap_lock);
-    hnd = jl_atomic_load_acquire(map_slot);
-    if (hnd != NULL)
-        return hnd;
-    // We might run this concurrently on two threads but it doesn't matter.
-    hnd = jl_load_dynamic_library(f_lib, JL_RTLD_DEFAULT, throw_err);
-    if (hnd != NULL)
-        jl_atomic_store_release(map_slot, hnd);
+    void *hnd = *map_slot;
+    if (hnd == NULL) {
+        hnd = jl_load_dynamic_library(f_lib, JL_RTLD_DEFAULT, throw_err);
+        if (hnd != NULL)
+            *map_slot = hnd;
+    }
+    JL_UNLOCK(&libmap_lock);
     return hnd;
 }
 
 extern "C" JL_DLLEXPORT
-void *jl_load_and_lookup(const char *f_lib, const char *f_name, void **hnd) JL_NOTSAFEPOINT
+void *jl_load_and_lookup(const char *f_lib, const char *f_name, void **hnd)
 {
     void *handle = jl_atomic_load_acquire(hnd);
     if (!handle)
@@ -210,11 +208,11 @@ extern "C" JL_DLLEXPORT char *jl_format_filename(const char *output_pattern)
 }
 
 
-static jl_mutex_t trampoline_lock;          // for accesses to the cache and freelist
+static jl_mutex_t trampoline_lock; // for accesses to the cache and freelist
 
 static void *trampoline_freelist;
 
-static void *trampoline_alloc()             // lock taken by caller
+static void *trampoline_alloc() JL_NOTSAFEPOINT // lock taken by caller
 {
     const int sz = 64; // oversized for most platforms. todo: use precise value?
     if (!trampoline_freelist) {
@@ -235,6 +233,7 @@ static void *trampoline_alloc()             // lock taken by caller
 #endif
         errno = last_errno;
         void *next = NULL;
+        assert(sz < jl_page_size);
         for (size_t i = 0; i + sz <= jl_page_size; i += sz) {
             void **curr = (void**)((char*)mem + i);
             *curr = next;
@@ -272,6 +271,8 @@ static void trampoline_deleter(void **f)
     JL_UNLOCK_NOGC(&trampoline_lock);
 }
 
+typedef void *(*init_trampoline_t)(void *tramp, void **nval) JL_NOTSAFEPOINT;
+
 // Use of `cache` is not clobbered in JL_TRY
 JL_GCC_IGNORE_START("-Wclobbered")
 extern "C" JL_DLLEXPORT
@@ -282,7 +283,7 @@ jl_value_t *jl_get_cfunction_trampoline(
     // call-site constants:
     htable_t *cache, // weakref htable indexed by (fobj, vals)
     jl_svec_t *fill,
-    void *(*init_trampoline)(void *tramp, void **nval),
+    init_trampoline_t init_trampoline,
     jl_unionall_t *env,
     jl_value_t **vals)
 {
@@ -339,11 +340,8 @@ jl_value_t *jl_get_cfunction_trampoline(
             ((void**)result)[1] = (void*)fobj;
         }
         if (!permanent) {
-            void *ptr_finalizer[2] = {
-                    (void*)jl_voidpointer_type,
-                    (void*)&trampoline_deleter
-                };
-            jl_gc_add_finalizer(result, (jl_value_t*)&ptr_finalizer[1]);
+            jl_task_t *ct = jl_current_task;
+            jl_gc_add_ptr_finalizer(ct->ptls, result, (void*)(uintptr_t)&trampoline_deleter);
             ((void**)result)[2] = (void*)cache;
             ((void**)result)[3] = (void*)nval;
         }
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 2337abe7d57041..741bb5448b847d 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -43,7 +43,7 @@ JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t
         return *pp;
     }
     else {
-        if (!jl_is_datatype(ety))
+        if (!is_valid_intrinsic_elptr(ety))
             jl_error("pointerref: invalid pointer");
         size_t nb = LLT_ALIGN(jl_datatype_size(ety), jl_datatype_align(ety));
         char *pp = (char*)jl_unbox_long(p) + (jl_unbox_long(i)-1)*nb;
@@ -56,25 +56,194 @@ JL_DLLEXPORT jl_value_t *jl_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t
 {
     JL_TYPECHK(pointerset, pointer, p);
     JL_TYPECHK(pointerset, long, i);
-    JL_TYPECHK(pointerref, long, align);
+    JL_TYPECHK(pointerset, long, align);
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
     if (ety == (jl_value_t*)jl_any_type) {
         jl_value_t **pp = (jl_value_t**)(jl_unbox_long(p) + (jl_unbox_long(i)-1)*sizeof(void*));
         *pp = x;
     }
     else {
-        if (!jl_is_datatype(ety))
+        if (!is_valid_intrinsic_elptr(ety))
             jl_error("pointerset: invalid pointer");
+        if (jl_typeof(x) != ety)
+            jl_type_error("pointerset", ety, x);
         size_t elsz = jl_datatype_size(ety);
         size_t nb = LLT_ALIGN(elsz, jl_datatype_align(ety));
         char *pp = (char*)jl_unbox_long(p) + (jl_unbox_long(i)-1)*nb;
-        if (jl_typeof(x) != ety)
-            jl_type_error("pointerset", ety, x);
         memcpy(pp, x, elsz);
     }
     return p;
 }
 
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerref(jl_value_t *p, jl_value_t *order)
+{
+    JL_TYPECHK(atomic_pointerref, pointer, p);
+    JL_TYPECHK(atomic_pointerref, symbol, order)
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 1, 0);
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    char *pp = (char*)jl_unbox_long(p);
+    if (ety == (jl_value_t*)jl_any_type) {
+        return jl_atomic_load((jl_value_t**)pp);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointerref: invalid pointer");
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointerref: invalid pointer for atomic operation");
+        return jl_atomic_new_bits(ety, pp);
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t *order)
+{
+    JL_TYPECHK(atomic_pointerset, pointer, p);
+    JL_TYPECHK(atomic_pointerset, symbol, order);
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 0, 1);
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    char *pp = (char*)jl_unbox_long(p);
+    if (ety == (jl_value_t*)jl_any_type) {
+        jl_atomic_store((jl_value_t**)pp, x);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointerset: invalid pointer");
+        if (jl_typeof(x) != ety)
+            jl_type_error("atomic_pointerset", ety, x);
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointerset: invalid pointer for atomic operation");
+        jl_atomic_store_bits(pp, x, nb);
+    }
+    return p;
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerswap(jl_value_t *p, jl_value_t *x, jl_value_t *order)
+{
+    JL_TYPECHK(atomic_pointerswap, pointer, p);
+    JL_TYPECHK(atomic_pointerswap, symbol, order);
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 1, 1);
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    jl_value_t *y;
+    char *pp = (char*)jl_unbox_long(p);
+    if (ety == (jl_value_t*)jl_any_type) {
+        y = jl_atomic_exchange((jl_value_t**)pp, x);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointerswap: invalid pointer");
+        if (jl_typeof(x) != ety)
+            jl_type_error("atomic_pointerswap", ety, x);
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointerswap: invalid pointer for atomic operation");
+        y = jl_atomic_swap_bits(ety, pp, x, nb);
+    }
+    return y;
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, jl_value_t *x, jl_value_t *order)
+{
+    JL_TYPECHK(atomic_pointerref, pointer, p);
+    JL_TYPECHK(atomic_pointerref, symbol, order)
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 1, 1);
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    char *pp = (char*)jl_unbox_long(p);
+    jl_value_t *expected;
+    if (ety == (jl_value_t*)jl_any_type) {
+        expected = jl_atomic_load((jl_value_t**)pp);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointermodify: invalid pointer");
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointermodify: invalid pointer for atomic operation");
+        expected = jl_atomic_new_bits(ety, pp);
+    }
+    jl_value_t **args;
+    JL_GC_PUSHARGS(args, 2);
+    args[0] = expected;
+    while (1) {
+        args[1] = x;
+        jl_value_t *y = jl_apply_generic(f, args, 2);
+        args[1] = y;
+        if (ety == (jl_value_t*)jl_any_type) {
+            if (jl_atomic_cmpswap((jl_value_t**)pp, &expected, y))
+                break;
+        }
+        else {
+            //if (!is_valid_intrinsic_elptr(ety)) // handled by jl_atomic_pointerref earlier
+            //    jl_error("atomic_pointermodify: invalid pointer");
+            if (jl_typeof(y) != ety)
+                jl_type_error("atomic_pointermodify", ety, y);
+            size_t nb = jl_datatype_size(ety);
+            if (jl_atomic_bool_cmpswap_bits(pp, expected, y, nb))
+                break;
+            expected = jl_atomic_new_bits(ety, pp);
+        }
+        args[0] = expected;
+        jl_gc_safepoint();
+    }
+    // args[0] == expected (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ety);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
+    JL_GC_POP();
+    return args[0];
+}
+
+
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *expected, jl_value_t *x, jl_value_t *success_order_sym, jl_value_t *failure_order_sym)
+{
+    JL_TYPECHK(atomic_pointerreplace, pointer, p);
+    JL_TYPECHK(atomic_pointerreplace, symbol, success_order_sym);
+    JL_TYPECHK(atomic_pointerreplace, symbol, failure_order_sym);
+    enum jl_memory_order success_order = jl_get_atomic_order_checked((jl_sym_t*)success_order_sym, 1, 1);
+    enum jl_memory_order failure_order = jl_get_atomic_order_checked((jl_sym_t*)failure_order_sym, 1, 0);
+    if (failure_order > success_order)
+        jl_atomic_error("atomic_pointerreplace: invalid atomic ordering");
+    // TODO: filter other invalid orderings
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    char *pp = (char*)jl_unbox_long(p);
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    if (ety == (jl_value_t*)jl_any_type) {
+        jl_value_t *result;
+        JL_GC_PUSH1(&result);
+        result = expected;
+        int success;
+        while (1) {
+            success = jl_atomic_cmpswap((jl_value_t**)pp, &result, x);
+            if (success || !jl_egal(result, expected))
+                break;
+        }
+        result = jl_new_struct(rettyp, result, success ? jl_true : jl_false);
+        JL_GC_POP();
+        return result;
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointerreplace: invalid pointer");
+        if (jl_typeof(x) != ety)
+            jl_type_error("atomic_pointerreplace", ety, x);
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointerreplace: invalid pointer for atomic operation");
+        return jl_atomic_cmpswap_bits((jl_datatype_t*)ety, rettyp, pp, expected, x, nb);
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_fence(jl_value_t *order_sym)
+{
+    JL_TYPECHK(fence, symbol, order_sym);
+    enum jl_memory_order order = jl_get_atomic_order_checked((jl_sym_t*)order_sym, 0, 0);
+    if (order > jl_memory_order_monotonic)
+        jl_fence();
+    return jl_nothing;
+}
+
 JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
 {
     JL_TYPECHK(cglobal, type, ty);
@@ -414,7 +583,7 @@ static inline jl_value_t *jl_intrinsiclambda_ty1(jl_value_t *ty, void *pa, unsig
 
 static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsigned osize, unsigned osize2, const void *voidlist)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     intrinsic_u1_t op = select_intrinsic_u1(osize2, (const intrinsic_u1_t*)voidlist);
     uint64_t cnt = op(osize * host_char_bit, pa);
     // TODO: the following assume little-endian
@@ -422,7 +591,7 @@ static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsign
     if (osize <= sizeof(cnt)) {
         return jl_new_bits(ty, &cnt);
     }
-    jl_value_t *newv = jl_gc_alloc(ptls, osize, ty);
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, osize, ty);
     // perform zext, if needed
     memset((char*)jl_data_ptr(newv) + sizeof(cnt), 0, osize - sizeof(cnt));
     memcpy(jl_data_ptr(newv), &cnt, sizeof(cnt));
@@ -478,13 +647,13 @@ typedef void (fintrinsic_op1)(unsigned, void*, void*);
 
 static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (!jl_is_primitivetype(jl_typeof(a)))
         jl_errorf("%s: value is not a primitive type", name);
     if (!jl_is_primitivetype(ty))
         jl_errorf("%s: type is not a primitive type", name);
     unsigned sz2 = jl_datatype_size(ty);
-    jl_value_t *newv = jl_gc_alloc(ptls, sz2, ty);
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, sz2, ty);
     void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv);
     unsigned sz = jl_datatype_size(jl_typeof(a));
     switch (sz) {
@@ -644,9 +813,9 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v
     params[0] = ty;
     params[1] = (jl_value_t*)jl_bool_type;
     jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
-    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALAWYS_LEAFTYPE)
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_value_t *newv = jl_gc_alloc(ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp);
+    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp);
 
     intrinsic_checked_t op = select_intrinsic_checked(sz2, (const intrinsic_checked_t*)voidlist);
     int ovflw = op(sz * host_char_bit, pa, pb, jl_data_ptr(newv));
@@ -673,14 +842,14 @@ static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa
     bi_intrinsic_ctype(OP, name, 64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 { \
-    jl_ptls_t ptls = jl_get_ptls_states();\
+    jl_task_t *ct = jl_current_task; \
     jl_value_t *ty = jl_typeof(a); \
     if (jl_typeof(b) != ty) \
         jl_error(#name ": types of a and b must match"); \
     if (!jl_is_primitivetype(ty)) \
         jl_error(#name ": values are not primitive types"); \
     int sz = jl_datatype_size(ty); \
-    jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);          \
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, sz, ty); \
     void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pr = jl_data_ptr(newv); \
     switch (sz) { \
     /* choose the right size c-type operation */ \
@@ -736,14 +905,14 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
     ter_intrinsic_ctype(OP, name, 64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c) \
 { \
-    jl_ptls_t ptls = jl_get_ptls_states();\
+    jl_task_t *ct = jl_current_task; \
     jl_value_t *ty = jl_typeof(a); \
     if (jl_typeof(b) != ty || jl_typeof(c) != ty) \
         jl_error(#name ": types of a, b, and c must match"); \
     if (!jl_is_primitivetype(ty)) \
         jl_error(#name ": values are not primitive types"); \
-    int sz = jl_datatype_size(ty);                                      \
-    jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);                       \
+    int sz = jl_datatype_size(ty); \
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, sz, ty); \
     void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pc = jl_data_ptr(c), *pr = jl_data_ptr(newv); \
     switch (sz) { \
     /* choose the right size c-type operation */ \
@@ -834,33 +1003,11 @@ fpiseq_n(double, 64)
 #define fpiseq(a,b) \
     sizeof(a) == sizeof(float) ? fpiseq32(a, b) : fpiseq64(a, b)
 
-#define fpislt_n(c_type, nbits)                                         \
-    static inline int fpislt##nbits(c_type a, c_type b) JL_NOTSAFEPOINT \
-    {                                                                   \
-        bits##nbits ua, ub;                                             \
-        ua.f = a;                                                       \
-        ub.f = b;                                                       \
-        if (!isnan(a) && isnan(b))                                      \
-            return 1;                                                   \
-        if (isnan(a) || isnan(b))                                       \
-            return 0;                                                   \
-        if (ua.d >= 0 && ua.d < ub.d)                                   \
-            return 1;                                                   \
-        if (ua.d < 0 && ua.ud > ub.ud)                                  \
-            return 1;                                                   \
-        return 0;                                                       \
-    }
-fpislt_n(float, 32)
-fpislt_n(double, 64)
-#define fpislt(a, b) \
-    sizeof(a) == sizeof(float) ? fpislt32(a, b) : fpislt64(a, b)
-
 bool_fintrinsic(eq,eq_float)
 bool_fintrinsic(ne,ne_float)
 bool_fintrinsic(lt,lt_float)
 bool_fintrinsic(le,le_float)
 bool_fintrinsic(fpiseq,fpiseq)
-bool_fintrinsic(fpislt,fpislt)
 
 // bitwise operators
 #define and_op(a,b) a & b
diff --git a/src/safepoint.c b/src/safepoint.c
index 2f90afaf508e06..d54c7c62bec56e 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -115,13 +115,14 @@ int jl_safepoint_start_gc(void)
         return 1;
     }
     // The thread should have set this already
-    assert(jl_get_ptls_states()->gc_state == JL_GC_STATE_WAITING);
+    assert(jl_current_task->ptls->gc_state == JL_GC_STATE_WAITING);
     jl_mutex_lock_nogc(&safepoint_lock);
     // In case multiple threads enter the GC at the same time, only allow
     // one of them to actually run the collection. We can't just let the
     // master thread do the GC since it might be running unmanaged code
     // and can take arbitrarily long time before hitting a safe point.
-    if (jl_atomic_compare_exchange(&jl_gc_running, 0, 1) != 0) {
+    uint32_t running = 0;
+    if (!jl_atomic_cmpswap(&jl_gc_running, &running, 1)) {
         jl_mutex_unlock_nogc(&safepoint_lock);
         jl_safepoint_wait_gc();
         return 0;
@@ -156,7 +157,7 @@ void jl_safepoint_end_gc(void)
 void jl_safepoint_wait_gc(void)
 {
     // The thread should have set this is already
-    assert(jl_get_ptls_states()->gc_state != 0);
+    assert(jl_current_task->ptls->gc_state != 0);
     // Use normal volatile load in the loop for speed until GC finishes.
     // Then use an acquire load to make sure the GC result is visible on this thread.
     while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
diff --git a/src/signal-handling.c b/src/signal-handling.c
index aa642eeedf2a2a..4743d8b37220aa 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -25,16 +25,20 @@ static volatile size_t bt_size_cur = 0;
 static volatile uint64_t nsecprof = 0;
 static volatile int running = 0;
 static const    uint64_t GIGA = 1000000000ULL;
+static uint64_t profile_cong_rng_seed = 0;
+static uint64_t profile_cong_rng_unbias = 0;
+static volatile uint64_t *profile_round_robin_thread_order = NULL;
 // Timers to take samples at intervals
 JL_DLLEXPORT void jl_profile_stop_timer(void);
 JL_DLLEXPORT int jl_profile_start_timer(void);
 void jl_lock_profile(void);
 void jl_unlock_profile(void);
+void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64_t *seed);
 
 JL_DLLEXPORT int jl_profile_is_buffer_full(void)
 {
-    // the latter `+ 1` is for the block terminator `0`.
-    return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 1 > bt_size_max;
+    // the `+ 6` is for the two block terminators `0` plus 4 metadata entries
+    return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 6 > bt_size_max;
 }
 
 static uint64_t jl_last_sigint_trigger = 0;
@@ -106,17 +110,16 @@ static uintptr_t jl_get_pc_from_ctx(const void *_ctx);
 void jl_show_sigill(void *_ctx);
 static size_t jl_safe_read_mem(const volatile char *ptr, char *out, size_t len)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
-    ptls->safe_restore = &buf;
+    jl_set_safe_restore(&buf);
     volatile size_t i = 0;
     if (!jl_setjmp(buf, 0)) {
-        for (;i < len;i++) {
+        for (; i < len; i++) {
             out[i] = ptr[i];
         }
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
     return i;
 }
 
@@ -235,18 +238,16 @@ void jl_show_sigill(void *_ctx)
 void jl_critical_error(int sig, bt_context_t *context)
 {
 
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_bt_element_t *bt_data = ptls->bt_data;
-    size_t *bt_size = &ptls->bt_size;
+    jl_task_t *ct = jl_current_task;
+    jl_bt_element_t *bt_data = ct->ptls->bt_data;
+    size_t *bt_size = &ct->ptls->bt_size;
     size_t i, n = *bt_size;
     if (sig) {
         // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
-        ptls->pgcstack = NULL;
-        ptls->safe_restore = NULL;
-        if (ptls->current_task) {
-            ptls->current_task->eh = NULL;
-            ptls->current_task->excstack = NULL;
-        }
+        jl_set_safe_restore(NULL);
+        ct->gcstack = NULL;
+        ct->eh = NULL;
+        ct->excstack = NULL;
 #ifndef _OS_WINDOWS_
         sigset_t sset;
         sigemptyset(&sset);
@@ -291,6 +292,17 @@ JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
     nsecprof = delay_nsec;
     if (bt_data_prof != NULL)
         free((void*)bt_data_prof);
+    if (profile_round_robin_thread_order == NULL) {
+        // NOTE: We currently only allocate this once, since jl_n_threads cannot change
+        // during execution of a julia process. If/when this invariant changes in the
+        // future, this will have to be adjusted.
+        profile_round_robin_thread_order = (uint64_t*) calloc(jl_n_threads, sizeof(uint64_t));
+        for (int i = 0; i < jl_n_threads; i++) {
+            profile_round_robin_thread_order[i] = i;
+        }
+    }
+    seed_cong(&profile_cong_rng_seed);
+    unbias_cong(jl_n_threads, &profile_cong_rng_unbias);
     bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
     if (bt_data_prof == NULL && maxsize > 0)
         return -1;
@@ -298,6 +310,17 @@ JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
     return 0;
 }
 
+void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64_t *seed) {
+    // The "modern Fisher–Yates shuffle" - O(n) algorithm
+    // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
+    for (size_t i = size - 1; i >= 1; --i) {
+        size_t j = cong(i, profile_cong_rng_unbias, seed);
+        uint64_t tmp = carray[j];
+        carray[j] = carray[i];
+        carray[i] = tmp;
+    }
+}
+
 JL_DLLEXPORT uint8_t *jl_profile_get_data(void)
 {
     return (uint8_t*) bt_data_prof;
diff --git a/src/signals-mach.c b/src/signals-mach.c
index d7f8fcfacc944d..1fc1ff98f2e8cb 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -84,7 +84,6 @@ extern boolean_t exc_server(mach_msg_header_t *, mach_msg_header_t *);
 void *mach_segv_listener(void *arg)
 {
     (void)arg;
-    (void)jl_get_ptls_states();
     while (1) {
         int ret = mach_msg_server(exc_server, 2048, segv_port, MACH_MSG_TIMEOUT_NONE);
         jl_safe_printf("mach_msg_server: %s\n", mach_error_string(ret));
@@ -167,7 +166,7 @@ static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state,
 #else
 #error "julia: throw-in-context not supported on this platform"
 #endif
-    if (ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
+    if (ptls2 == NULL || ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
         rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
     }
     else {
@@ -210,10 +209,11 @@ static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exceptio
     kern_return_t ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
     jl_ptls_t ptls2 = jl_all_tls_states[tid];
-    if (!ptls2->safe_restore) {
+    if (!jl_get_safe_restore()) {
         assert(exception);
-        ptls2->bt_size = rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE,
-                                           (bt_context_t*)&state, ptls2->pgcstack);
+        ptls2->bt_size =
+            rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t *)&state,
+                              NULL /*current_task?*/);
         ptls2->sig_exception = exception;
     }
     jl_call_in_state(ptls2, &state, &jl_sig_throw);
@@ -223,9 +223,10 @@ static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exceptio
 
 static void segv_handler(int sig, siginfo_t *info, void *context)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (ptls->safe_restore) { // restarting jl_ or jl_unwind_stepn
+    if (jl_get_safe_restore()) { // restarting jl_ or jl_unwind_stepn
+        jl_task_t *ct = jl_get_current_task();
+        jl_ptls_t ptls = ct == NULL ? NULL : ct->ptls;
         jl_call_in_state(ptls, (host_thread_state_t*)jl_to_bt_context(context), &jl_sig_throw);
     }
     else {
@@ -291,7 +292,7 @@ kern_return_t catch_exception_raise(mach_port_t            exception_port,
         }
         return KERN_SUCCESS;
     }
-    if (ptls2->safe_restore) {
+    if (jl_get_safe_restore()) {
         jl_throw_in_thread(tid, thread, jl_stackovf_exception);
         return KERN_SUCCESS;
     }
@@ -301,7 +302,7 @@ kern_return_t catch_exception_raise(mach_port_t            exception_port,
     if (msync((void*)(fault_addr & ~(jl_page_size - 1)), 1, MS_ASYNC) == 0) { // check if this was a valid address
 #endif
         jl_value_t *excpt;
-        if (is_addr_on_stack(ptls2, (void*)fault_addr)) {
+        if (is_addr_on_stack(ptls2->current_task, (void*)fault_addr)) {
             excpt = jl_stackovf_exception;
         }
 #ifdef SEGV_EXCEPTION
@@ -523,7 +524,6 @@ static kern_return_t profiler_segv_handler
 void *mach_profile_listener(void *arg)
 {
     (void)arg;
-    int i;
     const int max_size = 512;
     attach_exception_port(mach_thread_self(), 1);
 #ifdef LLVMLIBUNWIND
@@ -540,7 +540,10 @@ void *mach_profile_listener(void *arg)
         jl_lock_profile();
         void *unused = NULL;
         int keymgr_locked = _keymgr_get_and_lock_processwide_ptr_2(KEYMGR_GCC3_DW2_OBJ_LIST, &unused) == 0;
-        for (i = jl_n_threads; i-- > 0; ) {
+        jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
+        for (int idx = jl_n_threads; idx-- > 0; ) {
+            // Stop the threads in the random round-robin order.
+            int i = profile_round_robin_thread_order[idx];
             // if there is no space left, break early
             if (jl_profile_is_buffer_full()) {
                 jl_profile_stop_timer();
@@ -585,8 +588,22 @@ void *mach_profile_listener(void *arg)
 #else
                 bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
 #endif
+                jl_ptls_t ptls = jl_all_tls_states[i];
 
-                // Mark the end of this block with 0
+                // store threadid but add 1 as 0 is preserved to indicate end of block
+                bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+
+                // store task id
+                bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;
+
+                // store cpu cycle clock
+                bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
+
+                // Mark the end of this block with two 0's
+                bt_data_prof[bt_size_cur++].uintptr = 0;
                 bt_data_prof[bt_size_cur++].uintptr = 0;
             }
             // We're done! Resume the thread.
diff --git a/src/signals-unix.c b/src/signals-unix.c
index 98aa9264eb3842..460ef4d80518eb 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -83,8 +83,11 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 #elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
     const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
     return ctx->uc_mcontext64->__ss.__sp;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.mc_rsp;
 #else
-    // TODO Add support for FreeBSD and PowerPC(64)?
+    // TODO Add support for PowerPC(64)?
     return 0;
 #endif
 }
@@ -109,7 +112,7 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c
     // checks that the syscall is made in the signal handler and that
     // the ucontext address is valid. Hopefully the value of the ucontext
     // will not be part of the validation...
-    if (!ptls->signal_stack) {
+    if (!ptls || !ptls->signal_stack) {
         sigset_t sset;
         sigemptyset(&sset);
         sigaddset(&sset, sig);
@@ -196,26 +199,29 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c
 #endif
 }
 
-static void jl_throw_in_ctx(jl_ptls_t ptls, jl_value_t *e, int sig, void *sigctx)
+static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *e, int sig, void *sigctx)
 {
-    if (!ptls->safe_restore)
-        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE,
-                                          jl_to_bt_context(sigctx), ptls->pgcstack);
-    ptls->sig_exception = e;
+    jl_ptls_t ptls = ct->ptls;
+    if (!jl_get_safe_restore()) {
+        ptls->bt_size =
+            rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
+                              ct->gcstack);
+        ptls->sig_exception = e;
+    }
     jl_call_in_ctx(ptls, &jl_sig_throw, sig, sigctx);
 }
 
 static pthread_t signals_thread;
 
-static int is_addr_on_stack(jl_ptls_t ptls, void *addr)
+static int is_addr_on_stack(jl_task_t *ct, void *addr)
 {
-    jl_task_t *t = ptls->current_task;
-    if (t->copy_stack)
+    if (ct->copy_stack) {
+        jl_ptls_t ptls = ct->ptls;
         return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize &&
                 (char*)addr < (char*)ptls->stackbase);
-    else
-        return ((char*)addr > (char*)t->stkbuf &&
-                (char*)addr < (char*)t->stkbuf + t->bufsz);
+    }
+    return ((char*)addr > (char*)ct->stkbuf &&
+            (char*)addr < (char*)ct->stkbuf + ct->bufsz);
 }
 
 static void sigdie_handler(int sig, siginfo_t *info, void *context)
@@ -305,26 +311,34 @@ static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
 
 static void segv_handler(int sig, siginfo_t *info, void *context)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    if (jl_get_safe_restore()) { // restarting jl_ or profile
+        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+        return;
+    }
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL) {
+        sigdie_handler(sig, info, context);
+        return;
+    }
     assert(sig == SIGSEGV || sig == SIGBUS);
     if (jl_addr_is_safepoint((uintptr_t)info->si_addr)) {
         jl_set_gc_and_wait();
         // Do not raise sigint on worker thread
-        if (ptls->tid != 0)
+        if (ct->tid != 0)
             return;
-        if (ptls->defer_signal) {
+        if (ct->ptls->defer_signal) {
             jl_safepoint_defer_sigint();
         }
         else if (jl_safepoint_consume_sigint()) {
             jl_clear_force_sigint();
-            jl_throw_in_ctx(ptls, jl_interrupt_exception, sig, context);
+            jl_throw_in_ctx(ct, jl_interrupt_exception, sig, context);
         }
         return;
     }
-    if (ptls->safe_restore || is_addr_on_stack(ptls, info->si_addr)) { // stack overflow, or restarting jl_
-        jl_throw_in_ctx(ptls, jl_stackovf_exception, sig, context);
+    if (is_addr_on_stack(ct, info->si_addr)) { // stack overflow
+        jl_throw_in_ctx(ct, jl_stackovf_exception, sig, context);
     }
-    else if (jl_is_on_sigstack(ptls, info->si_addr, context)) {
+    else if (jl_is_on_sigstack(ct->ptls, info->si_addr, context)) {
         // This mainly happens when one of the finalizers during final cleanup
         // on the signal stack has a deep/infinite recursion.
         // There isn't anything more we can do
@@ -334,11 +348,11 @@ static void segv_handler(int sig, siginfo_t *info, void *context)
         _exit(sig + 128);
     }
     else if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && is_write_fault(context)) {  // writing to read-only memory (e.g., mmap)
-        jl_throw_in_ctx(ptls, jl_readonlymemory_exception, sig, context);
+        jl_throw_in_ctx(ct, jl_readonlymemory_exception, sig, context);
     }
     else {
 #ifdef SEGV_EXCEPTION
-        jl_throw_in_ctx(ptls, jl_segv_exception, sig, context);
+        jl_throw_in_ctx(ct, jl_segv_exception, sig, context);
 #else
         sigdie_handler(sig, info, context);
 #endif
@@ -433,7 +447,10 @@ static void jl_exit_thread0(int state, jl_bt_element_t *bt_data, size_t bt_size)
 // 3: exit with `thread0_exit_state`
 void usr2_handler(int sig, siginfo_t *info, void *ctx)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL)
+        return;
+    jl_ptls_t ptls = ct->ptls;
     int errno_save = errno;
     sig_atomic_t request = jl_atomic_exchange(&ptls->signal_request, 0);
 #if !defined(JL_DISABLE_LIBUNWIND)
@@ -457,11 +474,11 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
                 jl_safe_printf("WARNING: Force throwing a SIGINT\n");
             // Force a throw
             jl_clear_force_sigint();
-            jl_throw_in_ctx(ptls, jl_interrupt_exception, sig, ctx);
+            jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
         }
     }
     else if (request == 3) {
-        jl_call_in_ctx(ptls, jl_exit_thread0_cb, sig, ctx);
+        jl_call_in_ctx(ct->ptls, jl_exit_thread0_cb, sig, ctx);
     }
     errno = errno_save;
 }
@@ -731,7 +748,10 @@ static void *signal_listener(void *arg)
         // (so that thread zero gets notified last)
         if (critical || profile)
             jl_lock_profile();
-        for (int i = jl_n_threads; i-- > 0; ) {
+        jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
+        for (int idx = jl_n_threads; idx-- > 0; ) {
+            // Stop the threads in the random round-robin order.
+            int i = profile_round_robin_thread_order[idx];
             // notify thread to stop
             jl_thread_suspend_and_get_state(i, &signal_context);
 
@@ -753,11 +773,10 @@ static void *signal_listener(void *arg)
                 else {
                     // unwinding can fail, so keep track of the current state
                     // and restore from the SEGV handler if anything happens.
-                    jl_ptls_t ptls = jl_get_ptls_states();
-                    jl_jmp_buf *old_buf = ptls->safe_restore;
+                    jl_jmp_buf *old_buf = jl_get_safe_restore();
                     jl_jmp_buf buf;
 
-                    ptls->safe_restore = &buf;
+                    jl_set_safe_restore(&buf);
                     if (jl_setjmp(buf, 0)) {
                         jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
                     } else {
@@ -765,9 +784,24 @@ static void *signal_listener(void *arg)
                         bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
                                 bt_size_max - bt_size_cur - 1, signal_context, NULL);
                     }
-                    ptls->safe_restore = old_buf;
+                    jl_set_safe_restore(old_buf);
+
+                    jl_ptls_t ptls = jl_all_tls_states[i];
+
+                    // store threadid but add 1 as 0 is preserved to indicate end of block
+                    bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+
+                    // store task id
+                    bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;
+
+                    // store cpu cycle clock
+                    bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                    // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                    bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
 
-                    // Mark the end of this block with 0
+                    // Mark the end of this block with two 0's
+                    bt_data_prof[bt_size_cur++].uintptr = 0;
                     bt_data_prof[bt_size_cur++].uintptr = 0;
                 }
             }
@@ -832,8 +866,15 @@ void restore_signals(void)
 static void fpe_handler(int sig, siginfo_t *info, void *context)
 {
     (void)info;
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_throw_in_ctx(ptls, jl_diverror_exception, sig, context);
+    if (jl_get_safe_restore()) { // restarting jl_ or profile
+        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+        return;
+    }
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL) // exception on foreign thread is fatal
+        sigdie_handler(sig, info, context);
+    else
+        jl_throw_in_ctx(ct, jl_diverror_exception, sig, context);
 }
 
 static void sigint_handler(int sig)
diff --git a/src/signals-win.c b/src/signals-win.c
index ace5a178d483a0..dd8dca959cabf3 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -43,11 +43,11 @@ static char *strsignal(int sig)
 
 static void jl_try_throw_sigint(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
     int force = jl_check_force_sigint();
-    if (force || (!ptls->defer_signal && ptls->io_wait)) {
+    if (force || (!ct->ptls->defer_signal && ct->ptls->io_wait)) {
         jl_safepoint_consume_sigint();
         if (force)
             jl_safe_printf("WARNING: Force throwing a SIGINT\n");
@@ -59,7 +59,7 @@ static void jl_try_throw_sigint(void)
 
 void __cdecl crt_sig_handler(int sig, int num)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     CONTEXT Context;
     switch (sig) {
     case SIGFPE:
@@ -86,7 +86,7 @@ void __cdecl crt_sig_handler(int sig, int num)
         }
         break;
     default: // SIGSEGV, (SSIGTERM, IGILL)
-        if (ptls->safe_restore)
+        if (jl_get_safe_restore())
             jl_rethrow();
         memset(&Context, 0, sizeof(Context));
         RtlCaptureContext(&Context);
@@ -108,7 +108,9 @@ static int have_backtrace_fiber;
 static void JL_NORETURN start_backtrace_fiber(void)
 {
     // collect the backtrace
-    stkerror_ptls->bt_size = rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx, stkerror_ptls->pgcstack);
+    stkerror_ptls->bt_size =
+        rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx,
+                          NULL /*current_task?*/);
     // switch back to the execution fiber
     jl_setcontext(&error_return_fiber);
     abort();
@@ -122,7 +124,8 @@ void restore_signals(void)
 
 void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
 #if defined(_CPU_X86_64_)
     DWORD64 Rsp = (ctxThread->Rsp & (DWORD64)-16) - 8;
 #elif defined(_CPU_X86_)
@@ -130,14 +133,15 @@ void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
 #else
 #error WIN16 not supported :P
 #endif
-    if (!ptls->safe_restore) {
+    if (!jl_get_safe_restore()) {
         assert(excpt != NULL);
         ptls->bt_size = 0;
         if (excpt != jl_stackovf_exception) {
-            ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread, ptls->pgcstack);
+            ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
+                                              ct->gcstack);
         }
         else if (have_backtrace_fiber) {
-            JL_LOCK(&backtrace_lock);
+            JL_LOCK_NOGC(&backtrace_lock);
             stkerror_ctx = ctxThread;
             stkerror_ptls = ptls;
             jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
@@ -222,7 +226,7 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara
 
 LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) {
         switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
             case EXCEPTION_INT_DIVIDE_BY_ZERO:
@@ -248,7 +252,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
                     }
                     return EXCEPTION_CONTINUE_EXECUTION;
                 }
-                if (ptls->safe_restore) {
+                if (jl_get_safe_restore()) {
                     jl_throw_in_ctx(NULL, ExceptionInfo->ContextRecord);
                     return EXCEPTION_CONTINUE_EXECUTION;
                 }
@@ -356,7 +360,23 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
                     // Get backtrace data
                     bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
                             bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
-                    // Mark the end of this block with 0
+
+                    jl_ptls_t ptls = jl_all_tls_states[0]; // given only profiling hMainThread
+
+                    // store threadid but add 1 as 0 is preserved to indicate end of block
+                    bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+
+                    // store task id
+                    bt_data_prof[bt_size_cur++].uintptr = ptls->current_task;
+
+                    // store cpu cycle clock
+                    bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                    // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                    bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
+
+                    // Mark the end of this block with two 0's
+                    bt_data_prof[bt_size_cur++].uintptr = 0;
                     bt_data_prof[bt_size_cur++].uintptr = 0;
                 }
                 jl_unlock_profile();
diff --git a/src/simplevector.c b/src/simplevector.c
index 41b1be14da7f4c..2b87eb92c41d13 100644
--- a/src/simplevector.c
+++ b/src/simplevector.c
@@ -34,8 +34,8 @@ jl_svec_t *(jl_perm_symsvec)(size_t n, ...)
 
 JL_DLLEXPORT jl_svec_t *jl_svec1(void *a)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 2,
+    jl_task_t *ct = jl_current_task;
+    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 2,
                                            jl_simplevector_type);
     jl_svec_set_len_unsafe(v, 1);
     jl_svecset(v, 0, a);
@@ -44,8 +44,8 @@ JL_DLLEXPORT jl_svec_t *jl_svec1(void *a)
 
 JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 3,
+    jl_task_t *ct = jl_current_task;
+    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 3,
                                            jl_simplevector_type);
     jl_svec_set_len_unsafe(v, 2);
     jl_svecset(v, 0, a);
@@ -55,9 +55,9 @@ JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b)
 
 JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (n == 0) return jl_emptysvec;
-    jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ptls, (n + 1) * sizeof(void*),
+    jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ct->ptls, (n + 1) * sizeof(void*),
                                             jl_simplevector_type);
     jl_svec_set_len_unsafe(jv, n);
     return jv;
diff --git a/src/stackwalk.c b/src/stackwalk.c
index a00980fdf81ec8..2994e653cb462b 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -83,11 +83,10 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
     }
 #endif
 #if !defined(_OS_WINDOWS_)
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
+    jl_set_safe_restore(&buf);
     if (!jl_setjmp(buf, 0)) {
-        ptls->safe_restore = &buf;
 #endif
         int have_more_frames = 1;
         while (have_more_frames) {
@@ -175,7 +174,7 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
         // reader happy.
         if (n > 0) n -= 1;
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
 #endif
 #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
     JL_UNLOCK_NOGC(&jl_in_stackwalk);
@@ -314,7 +313,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,
 
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
 {
-    jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack;
+    jl_excstack_t *s = jl_current_task->excstack;
     jl_bt_element_t *bt_data = NULL;
     size_t bt_size = 0;
     if (s && s->top) {
@@ -335,9 +334,9 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
 // interleaved.
 JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int max_entries)
 {
-    JL_TYPECHK(catch_stack, task, (jl_value_t*)task);
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (task != ptls->current_task && task->_state == JL_TASK_STATE_RUNNABLE) {
+    JL_TYPECHK(current_exceptions, task, (jl_value_t*)task);
+    jl_task_t *ct = jl_current_task;
+    if (task != ct && task->_state == JL_TASK_STATE_RUNNABLE) {
         jl_error("Inspecting the exception stack of a task which might "
                  "be running concurrently isn't allowed.");
     }
@@ -574,11 +573,11 @@ static int jl_unw_step(bt_cursor_t *cursor, int from_signal_handler, uintptr_t *
 
 JL_DLLEXPORT jl_value_t *jl_lookup_code_address(void *ip, int skipC)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_frame_t *frames = NULL;
-    int8_t gc_state = jl_gc_safe_enter(ptls);
+    int8_t gc_state = jl_gc_safe_enter(ct->ptls);
     int n = jl_getFunctionInfo(&frames, (uintptr_t)ip, skipC, 0);
-    jl_gc_safe_leave(ptls, gc_state);
+    jl_gc_safe_leave(ct->ptls, gc_state);
     jl_value_t *rs = (jl_value_t*)jl_alloc_svec(n);
     JL_GC_PUSH1(&rs);
     for (int i = 0; i < n; i++) {
@@ -694,16 +693,17 @@ extern bt_context_t *jl_to_bt_context(void *sigctx);
 
 void jl_rec_backtrace(jl_task_t *t)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     ptls->bt_size = 0;
-    if (t == ptls->current_task) {
+    if (t == ct) {
         ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
         return;
     }
     if (t->copy_stack || !t->started || t->stkbuf == NULL)
         return;
-    int old = jl_atomic_compare_exchange(&t->tid, -1, ptls->tid);
-    if (old != -1 && old != ptls->tid)
+    int16_t old = -1;
+    if (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid)
         return;
     bt_context_t *context = NULL;
 #if defined(_OS_WINDOWS_)
@@ -751,10 +751,10 @@ JL_DLLEXPORT void jl_gdblookup(void* ip)
 // Print backtrace for current exception in catch block
 JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->current_task == NULL)
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls == NULL)
         return;
-    jl_excstack_t *s = ptls->current_task->excstack;
+    jl_excstack_t *s = ct->excstack;
     if (!s)
         return;
     size_t i, bt_size = jl_excstack_bt_size(s, s->top);
@@ -765,7 +765,8 @@ JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
 }
 JL_DLLEXPORT void jlbacktracet(jl_task_t *t)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     jl_rec_backtrace(t);
     size_t i, bt_size = ptls->bt_size;
     jl_bt_element_t *bt_data = ptls->bt_data;
@@ -774,6 +775,11 @@ JL_DLLEXPORT void jlbacktracet(jl_task_t *t)
     }
 }
 
+JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
+{
+    jlbacktrace();
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/staticdata.c b/src/staticdata.c
index 44b6f33d05e7a9..f5892d4218e714 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -30,7 +30,7 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    147
+#define NUM_TAGS    151
 
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
@@ -127,6 +127,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_floatingpoint_type);
         INSERT_TAG(jl_number_type);
         INSERT_TAG(jl_signed_type);
+        INSERT_TAG(jl_pair_type);
 
         // special typenames
         INSERT_TAG(jl_tuple_typename);
@@ -153,13 +154,13 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_memory_exception);
         INSERT_TAG(jl_undefref_exception);
         INSERT_TAG(jl_readonlymemory_exception);
+        INSERT_TAG(jl_atomicerror_type);
 
         // other special values
         INSERT_TAG(jl_emptysvec);
         INSERT_TAG(jl_emptytuple);
         INSERT_TAG(jl_false);
         INSERT_TAG(jl_true);
-        INSERT_TAG(jl_nothing);
         INSERT_TAG(jl_an_empty_string);
         INSERT_TAG(jl_an_empty_vec_any);
         INSERT_TAG(jl_module_init_order);
@@ -186,6 +187,9 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_builtin_svec);
         INSERT_TAG(jl_builtin_getfield);
         INSERT_TAG(jl_builtin_setfield);
+        INSERT_TAG(jl_builtin_swapfield);
+        INSERT_TAG(jl_builtin_modifyfield);
+        INSERT_TAG(jl_builtin_replacefield);
         INSERT_TAG(jl_builtin_fieldtype);
         INSERT_TAG(jl_builtin_arrayref);
         INSERT_TAG(jl_builtin_const_arrayref);
@@ -241,7 +245,8 @@ static const jl_fptr_args_t id_to_fptrs[] = {
     &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure,
     &jl_f__call_latest, &jl_f__call_in_world, &jl_f_isdefined,
     &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call, &jl_f_invoke_kwsorter,
-    &jl_f_getfield, &jl_f_setfield, &jl_f_fieldtype, &jl_f_nfields,
+    &jl_f_getfield, &jl_f_setfield, &jl_f_swapfield, &jl_f_modifyfield,
+    &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields,
     &jl_f_arrayref, &jl_f_const_arrayref, &jl_f_arrayset, &jl_f_arraysize, &jl_f_apply_type,
     &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar,
     &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype,
@@ -338,9 +343,13 @@ static void jl_load_sysimg_so(void)
         jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 1);
         sysimg_gvars_offsets += 1;
         assert(sysimg_fptrs.base);
-        uintptr_t *tls_getter_slot;
-        jl_dlsym(jl_sysimg_handle, "jl_get_ptls_states_slot", (void **)&tls_getter_slot, 1);
-        *tls_getter_slot = (uintptr_t)jl_get_ptls_states_getter();
+
+        void *pgcstack_func_slot;
+        jl_dlsym(jl_sysimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 1);
+        void *pgcstack_key_slot;
+        jl_dlsym(jl_sysimg_handle, "jl_pgcstack_key_slot", &pgcstack_key_slot, 1);
+        jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot);
+
         size_t *tls_offset_idx;
         jl_dlsym(jl_sysimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1);
         *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
@@ -411,7 +420,7 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
 static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive)
 {
     // ignore items that are given a special representation
-    if (v == NULL || jl_is_symbol(v)) {
+    if (v == NULL || jl_is_symbol(v) || v == jl_nothing) {
         return;
     }
     else if (jl_typeis(v, jl_task_type)) {
@@ -572,19 +581,22 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPO
     else if (v == (jl_value_t*)s->ptls->root_task) {
         return (uintptr_t)TagRef << RELOC_TAG_OFFSET;
     }
+    else if (v == jl_nothing) {
+        return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + 1;
+    }
     else if (jl_typeis(v, jl_int64_type)) {
         int64_t i64 = *(int64_t*)v + NBOX_C / 2;
         if ((uint64_t)i64 < NBOX_C)
-            return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i64 + 1;
+            return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i64 + 2;
     }
     else if (jl_typeis(v, jl_int32_type)) {
         int32_t i32 = *(int32_t*)v + NBOX_C / 2;
         if ((uint32_t)i32 < NBOX_C)
-            return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i32 + 1 + NBOX_C;
+            return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i32 + 2 + NBOX_C;
     }
     else if (jl_typeis(v, jl_uint8_type)) {
         uint8_t u8 = *(uint8_t*)v;
-        return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 1 + NBOX_C + NBOX_C;
+        return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 2 + NBOX_C + NBOX_C;
     }
     if (idx == HT_NOTFOUND) {
         idx = ptrhash_get(&backref_table, v);
@@ -771,14 +783,23 @@ static void jl_write_values(jl_serializer_state *s)
 #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
             jl_array_t *ar = (jl_array_t*)v;
             jl_value_t *et = jl_tparam0(jl_typeof(v));
+            size_t alen = jl_array_len(ar);
+            size_t datasize = alen * ar->elsize;
+            size_t tot = datasize;
+            int isbitsunion = jl_array_isbitsunion(ar);
+            if (isbitsunion)
+                tot += alen;
+            else if (ar->elsize == 1)
+                tot += 1;
             int ndimwords = jl_array_ndimwords(ar->flags.ndims);
-            size_t tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
+            size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
             // copy header
-            ios_write(s->s, (char*)v, tsz);
+            ios_write(s->s, (char*)v, headersize);
+            size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
+            if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
+                alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
             // make some header modifications in-place
             jl_array_t *newa = (jl_array_t*)&s->s->buf[reloc_offset];
-            size_t alen = jl_array_len(ar);
-            size_t tot = alen * ar->elsize;
             if (newa->flags.ndims == 1)
                 newa->maxsize = alen;
             newa->offset = 0;
@@ -788,8 +809,7 @@ static void jl_write_values(jl_serializer_state *s)
 
             // write data
             if (!ar->flags.ptrarray && !ar->flags.hasptr) {
-                uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 16);
-                // realign stream to max(data-align(array), sizeof(void*))
+                uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
                 write_padding(s->const_data, data - ios_pos(s->const_data));
                 // write data and relocations
                 newa->data = NULL; // relocation offset
@@ -804,22 +824,27 @@ static void jl_write_values(jl_serializer_state *s)
                         write_pointer(s->const_data);
                 }
                 else {
-                    int isbitsunion = jl_array_isbitsunion(ar);
-                    if (ar->elsize == 1 && !isbitsunion)
-                        tot += 1;
-                    ios_write(s->const_data, (char*)jl_array_data(ar), tot);
-                    if (isbitsunion)
+                    if (isbitsunion) {
+                        ios_write(s->const_data, (char*)jl_array_data(ar), datasize);
                         ios_write(s->const_data, jl_array_typetagdata(ar), alen);
+                    }
+                    else {
+                        ios_write(s->const_data, (char*)jl_array_data(ar), tot);
+                    }
                 }
             }
             else {
-                newa->data = (void*)tsz; // relocation offset
+                size_t data = LLT_ALIGN(ios_pos(s->s), alignment_amt);
+                size_t padding_amt = data - ios_pos(s->s);
+                write_padding(s->s, padding_amt);
+                headersize += padding_amt;
+                newa->data = (void*)headersize; // relocation offset
                 arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
                 arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
                 if (ar->flags.hasptr) {
                     // copy all of the data first
                     const char *data = (const char*)jl_array_data(ar);
-                    ios_write(s->s, data, tot);
+                    ios_write(s->s, data, datasize);
                     // the rewrite all of the embedded pointers to null+relocation
                     uint16_t elsz = ar->elsize;
                     size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
@@ -829,12 +854,12 @@ static void jl_write_values(jl_serializer_state *s)
                             size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*);
                             jl_value_t *fld = *(jl_value_t**)&data[offset];
                             if (fld != NULL) {
-                                arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + tsz + offset)); // relocation location
+                                arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + headersize + offset)); // relocation location
                                 arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
-                                memset(&s->s->buf[reloc_offset + tsz + offset], 0, sizeof(fld)); // relocation offset (none)
+                                memset(&s->s->buf[reloc_offset + headersize + offset], 0, sizeof(fld)); // relocation offset (none)
                             }
                             else {
-                                assert(*(jl_value_t**)&s->s->buf[reloc_offset + tsz + offset] == NULL);
+                                assert(*(jl_value_t**)&s->s->buf[reloc_offset + headersize + offset] == NULL);
                             }
                         }
                     }
@@ -901,7 +926,7 @@ static void jl_write_values(jl_serializer_state *s)
                 write_padding(s->s, offset - tot);
                 tot = offset;
                 size_t fsz = jl_field_size(t, i);
-                if (t->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) {
+                if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) {
                     // reset Ptr fields to C_NULL
                     assert(!jl_field_isptr(t, i));
                     write_pointer(s->s);
@@ -1012,6 +1037,20 @@ static void jl_write_values(jl_serializer_state *s)
                     ios_write(s->const_data, flddesc, fldsize);
                 }
             }
+            else if (jl_is_typename(v)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
+                jl_typename_t *newtn = (jl_typename_t*)&s->s->buf[reloc_offset];
+                if (tn->atomicfields != NULL) {
+                    size_t nf = jl_svec_len(tn->names);
+                    uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
+                    write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream
+                    newtn->atomicfields = NULL; // relocation offset
+                    layout /= sizeof(void*);
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_typename_t, atomicfields))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target
+                    ios_write(s->const_data, (char*)tn->atomicfields, nf);
+                }
+            }
             else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
                 // will need to rehash this, later (after types are fully constructed)
                 arraylist_push(&reinit_list, (void*)item);
@@ -1041,7 +1080,7 @@ static void jl_write_gv_syms(jl_serializer_state *s, jl_sym_t *v)
         jl_write_gv_syms(s, v->right);
 }
 
-static void jl_write_gv_int(jl_serializer_state *s, jl_value_t *v)
+static void jl_write_gv_tagref(jl_serializer_state *s, jl_value_t *v)
 {
     int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v);
     if (gv != 0) {
@@ -1050,7 +1089,7 @@ static void jl_write_gv_int(jl_serializer_state *s, jl_value_t *v)
         record_gvar(s, gv, item);
     }
 }
-static void jl_write_gv_ints(jl_serializer_state *s)
+static void jl_write_gv_tagrefs(jl_serializer_state *s)
 {
     // this also ensures all objects referenced in the code have
     // references in the system image to their global variable
@@ -1058,12 +1097,15 @@ static void jl_write_gv_ints(jl_serializer_state *s)
     // they might not have had a reference anywhere in the code
     // image other than here
     size_t i;
+    jl_write_gv_tagref(s, (jl_value_t*)s->ptls->root_task);
+    jl_write_gv_tagref(s, s->ptls->root_task->tls);
+    jl_write_gv_tagref(s, jl_nothing);
     for (i = 0; i < NBOX_C; i++) {
-        jl_write_gv_int(s, jl_box_int32((int32_t)i - NBOX_C / 2));
-        jl_write_gv_int(s, jl_box_int64((int64_t)i - NBOX_C / 2));
+        jl_write_gv_tagref(s, jl_box_int32((int32_t)i - NBOX_C / 2));
+        jl_write_gv_tagref(s, jl_box_int64((int64_t)i - NBOX_C / 2));
     }
     for (i = 0; i < 256; i++) {
-        jl_write_gv_int(s, jl_box_uint8(i));
+        jl_write_gv_tagref(s, jl_box_uint8(i));
     }
 }
 
@@ -1114,7 +1156,7 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
             assert(offset < nsym_tag && "corrupt relocation item id");
             break;
         case TagRef:
-            assert(offset < 2 * NBOX_C + 257 && "corrupt relocation item id");
+            assert(offset < 2 * NBOX_C + 258 && "corrupt relocation item id");
             break;
         case BindingRef:
             assert(offset == 0 && "corrupt relocation offset");
@@ -1154,7 +1196,9 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
     case TagRef:
         if (offset == 0)
             return (uintptr_t)s->ptls->root_task;
-        offset -= 1;
+        if (offset == 1)
+            return (uintptr_t)jl_nothing;
+        offset -= 2;
         if (offset < NBOX_C)
             return (uintptr_t)jl_box_int64((int64_t)offset - NBOX_C / 2);
         offset -= NBOX_C;
@@ -1510,7 +1554,7 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
     s.relocs = &relocs;
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
-    s.ptls = jl_get_ptls_states();
+    s.ptls = jl_current_task->ptls;
     arraylist_new(&s.relocs_list, 0);
     arraylist_new(&s.gctags_list, 0);
     jl_value_t **const*const tags = get_tags();
@@ -1566,7 +1610,7 @@ static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
         jl_write_values(&s);
         jl_write_relocations(&s);
         jl_write_gv_syms(&s, jl_get_root_symbol());
-        jl_write_gv_ints(&s);
+        jl_write_gv_tagrefs(&s);
     }
 
     if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET) ||
@@ -1695,7 +1739,7 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
     s.relocs = &relocs;
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
-    s.ptls = jl_get_ptls_states();
+    s.ptls = jl_current_task->ptls;
     arraylist_new(&s.relocs_list, 0);
     arraylist_new(&s.gctags_list, 0);
     jl_value_t **const*const tags = get_tags();
@@ -1738,9 +1782,11 @@ static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
         jl_value_t **tag = tags[i];
         *tag = jl_read_value(&s);
     }
-    s.ptls->root_task = (jl_task_t*)jl_gc_alloc(s.ptls, sizeof(jl_task_t), jl_task_type);
-    memset(s.ptls->root_task, 0, sizeof(jl_task_t));
+    // set typeof extra-special values now that we have the type set by tags above
+    jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header;
+    jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header;
     s.ptls->root_task->tls = jl_read_value(&s);
+    jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls);
     jl_init_int32_int64_cache();
     jl_init_box_caches();
 
diff --git a/src/subtype.c b/src/subtype.c
index 90f5438cfd1b62..c3512eeb17daca 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -42,11 +42,19 @@ extern "C" {
 // TODO: the stack probably needs to be artificially large because of some
 // deeper problem (see #21191) and could be shrunk once that is fixed
 typedef struct {
-    int depth;
-    int more;
+    int16_t depth;
+    int16_t more;
+    int16_t used;
     uint32_t stack[100];  // stack of bits represented as a bit vector
 } jl_unionstate_t;
 
+typedef struct {
+    int16_t depth;
+    int16_t more;
+    int16_t used;
+    void *stack;
+} jl_saved_unionstate_t;
+
 // Linked list storing the type variable environment. A new jl_varbinding_t
 // is pushed for each UnionAll type we encounter. `lb` and `ub` are updated
 // during the computation.
@@ -60,22 +68,21 @@ typedef struct jl_varbinding_t {
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
-    // in covariant position, we need to try constraining a variable in different ways:
-    // 0 - unconstrained
-    // 1 - less than
-    // 2 - greater than
-    // 3 - inexpressible - occurs when the var has non-trivial overlap with another type,
-    //                     and we would need to return `intersect(var,other)`. in this case
-    //                     we choose to over-estimate the intersection by returning the var.
+    // constraintkind: in covariant position, we try three different ways to compute var ∩ type:
+    // let ub = var.ub ∩ type
+    // 0 - var.ub <: type ? var : ub
+    // 1 - var.ub = ub; return var
+    // 2 - either (var.ub = ub; return var), or return ub
     int8_t constraintkind;
-    int depth0;         // # of invariant constructors nested around the UnionAll type for this var
+    int8_t intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
+    int8_t limited;
+    int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
     // when this variable's integer value is compared to that of another,
     // it equals `other + offset`. used by vararg length parameters.
-    int offset;
+    int16_t offset;
     // array of typevars that our bounds depend on, whose UnionAlls need to be
     // moved outside ours.
     jl_array_t *innervars;
-    int intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
     struct jl_varbinding_t *prev;
 } jl_varbinding_t;
 
@@ -94,6 +101,7 @@ typedef struct jl_stenv_t {
     int ignore_free;          // treat free vars as black boxes; used during intersection
     int intersection;         // true iff subtype is being called from intersection
     int emptiness_only;       // true iff intersection only needs to test for emptiness
+    int triangular;           // when intersecting Ref{X} with Ref{<:Y}
 } jl_stenv_t;
 
 // state manipulation utilities
@@ -129,6 +137,23 @@ static void statestack_set(jl_unionstate_t *st, int i, int val) JL_NOTSAFEPOINT
         st->stack[i>>5] &= ~(1u<<(i&31));
 }
 
+#define push_unionstate(saved, src)                                     \
+    do {                                                                \
+        (saved)->depth = (src)->depth;                                  \
+        (saved)->more = (src)->more;                                    \
+        (saved)->used = (src)->used;                                    \
+        (saved)->stack = alloca(((src)->used+7)/8);                     \
+        memcpy((saved)->stack, &(src)->stack, ((src)->used+7)/8);       \
+    } while (0);
+
+#define pop_unionstate(dst, saved)                                      \
+    do {                                                                \
+        (dst)->depth = (saved)->depth;                                  \
+        (dst)->more = (saved)->more;                                    \
+        (dst)->used = (saved)->used;                                    \
+        memcpy(&(dst)->stack, (saved)->stack, ((saved)->used+7)/8);     \
+    } while (0);
+
 typedef struct {
     int8_t *buf;
     int rdepth;
@@ -486,6 +511,10 @@ static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv
 {
     jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
     do {
+        if (state->depth >= state->used) {
+            statestack_set(state, state->used, 0);
+            state->used++;
+        }
         int ui = statestack_get(state, state->depth);
         state->depth++;
         if (ui == 0) {
@@ -514,11 +543,10 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         return 1;
     if (x == (jl_value_t*)jl_any_type && jl_is_datatype(y))
         return 0;
-    jl_unionstate_t oldLunions = e->Lunions;
-    jl_unionstate_t oldRunions = e->Runions;
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
+    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int sub;
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Lunions.used = e->Runions.used = 0;
     e->Runions.depth = 0;
     e->Runions.more = 0;
     e->Lunions.depth = 0;
@@ -526,8 +554,8 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 
     sub = forall_exists_subtype(x, y, e, 0);
 
-    e->Runions = oldRunions;
-    e->Lunions = oldLunions;
+    pop_unionstate(&e->Runions, &oldRunions);
+    pop_unionstate(&e->Lunions, &oldLunions);
     return sub;
 }
 
@@ -646,7 +674,7 @@ static int is_leaf_bound(jl_value_t *v) JL_NOTSAFEPOINT
     if (v == jl_bottom_type)
         return 1;
     if (jl_is_datatype(v)) {
-        if (((jl_datatype_t*)v)->abstract) {
+        if (((jl_datatype_t*)v)->name->abstract) {
             if (jl_is_type_type(v))
                 return 1;//!jl_has_free_typevars(jl_tparam0(v));
             return 0;
@@ -731,8 +759,8 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, 0, e->vars };
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
+                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
     int ans;
@@ -1148,6 +1176,10 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // union against the variable before trying to take it apart to see if there are any
             // variables lurking inside.
             jl_unionstate_t *state = &e->Runions;
+            if (state->depth >= state->used) {
+                statestack_set(state, state->used, 0);
+                state->used++;
+            }
             ui = statestack_get(state, state->depth);
             state->depth++;
             if (ui == 0)
@@ -1310,13 +1342,13 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         (is_definite_length_tuple_type(x) && is_indefinite_length_tuple_type(y)))
         return 0;
 
-    jl_unionstate_t oldLunions = e->Lunions;
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
+    e->Lunions.used = 0;
     int sub;
 
     if (!jl_has_free_typevars(x) || !jl_has_free_typevars(y)) {
-        jl_unionstate_t oldRunions = e->Runions;
-        memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
+        e->Runions.used = 0;
         e->Runions.depth = 0;
         e->Runions.more = 0;
         e->Lunions.depth = 0;
@@ -1324,7 +1356,7 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 
         sub = forall_exists_subtype(x, y, e, 2);
 
-        e->Runions = oldRunions;
+        pop_unionstate(&e->Runions, &oldRunions);
     }
     else {
         int lastset = 0;
@@ -1342,13 +1374,13 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
     }
 
-    e->Lunions = oldLunions;
+    pop_unionstate(&e->Lunions, &oldLunions);
     return sub && subtype(y, x, e, 0);
 }
 
 static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_value_t *saved, jl_savedenv_t *se, int param)
 {
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Runions.used = 0;
     int lastset = 0;
     while (1) {
         e->Runions.depth = 0;
@@ -1379,7 +1411,7 @@ static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, in
     JL_GC_PUSH1(&saved);
     save_env(e, &saved, &se);
 
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
+    e->Lunions.used = 0;
     int lastset = 0;
     int sub;
     while (1) {
@@ -1413,8 +1445,10 @@ static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
     e->ignore_free = 0;
     e->intersection = 0;
     e->emptiness_only = 0;
+    e->triangular = 0;
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
+    e->Lunions.used = 0;       e->Runions.used = 0;
 }
 
 // subtyping entry points
@@ -2084,14 +2118,14 @@ static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e,
     if (y == (jl_value_t*)jl_any_type && !jl_is_typevar(x))
         return x;
 
-    jl_unionstate_t oldRunions = e->Runions;
+    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int savedepth = e->invdepth, Rsavedepth = e->Rinvdepth;
     // TODO: this doesn't quite make sense
     e->invdepth = e->Rinvdepth = d;
 
     jl_value_t *res = intersect_all(x, y, e);
 
-    e->Runions = oldRunions;
+    pop_unionstate(&e->Runions, &oldRunions);
     e->invdepth = savedepth;
     e->Rinvdepth = Rsavedepth;
     return res;
@@ -2102,10 +2136,10 @@ static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t
     if (param == 2 || (!jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u))) {
         jl_value_t *a=NULL, *b=NULL;
         JL_GC_PUSH2(&a, &b);
-        jl_unionstate_t oldRunions = e->Runions;
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
         a = R ? intersect_all(x, u->a, e) : intersect_all(u->a, x, e);
         b = R ? intersect_all(x, u->b, e) : intersect_all(u->b, x, e);
-        e->Runions = oldRunions;
+        pop_unionstate(&e->Runions, &oldRunions);
         jl_value_t *i = simple_join(a,b);
         JL_GC_POP();
         return i;
@@ -2171,7 +2205,7 @@ static void set_bound(jl_value_t **bound, jl_value_t *val, jl_tvar_t *v, jl_sten
         return;
     jl_varbinding_t *btemp = e->vars;
     while (btemp != NULL) {
-        if (btemp->lb == (jl_value_t*)v && btemp->ub == (jl_value_t*)v &&
+        if ((btemp->lb == (jl_value_t*)v || btemp->ub == (jl_value_t*)v) &&
             in_union(val, (jl_value_t*)btemp->var))
             return;
         btemp = btemp->prev;
@@ -2210,13 +2244,44 @@ static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *
     return issub;
 }
 
+// See if var y is reachable from x via bounds; used to avoid cycles.
+static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e)
+{
+    if (in_union(x, (jl_value_t*)y))
+        return 1;
+    if (!jl_is_typevar(x))
+        return 0;
+    jl_varbinding_t *xv = lookup(e, (jl_tvar_t*)x);
+    if (xv == NULL)
+        return 0;
+    return reachable_var(xv->ub, y, e) || reachable_var(xv->lb, y, e);
+}
+
+// check whether setting v == t implies v == SomeType{v}, which is unsatisfiable.
+static int check_unsat_bound(jl_value_t *t, jl_tvar_t *v, jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    if (var_occurs_inside(t, v, 0, 0))
+        return 1;
+    jl_varbinding_t *btemp = e->vars;
+    while (btemp != NULL) {
+        if (btemp->lb == (jl_value_t*)v && btemp->ub == (jl_value_t*)v &&
+            var_occurs_inside(t, btemp->var, 0, 0))
+            return 1;
+        btemp = btemp->prev;
+    }
+    return 0;
+}
+
 static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int8_t R, int param)
 {
     jl_varbinding_t *bb = lookup(e, b);
     if (bb == NULL)
         return R ? intersect_aside(a, b->ub, e, 1, 0) : intersect_aside(b->ub, a, e, 0, 0);
-    if (bb->lb == bb->ub && jl_is_typevar(bb->lb) && bb->lb != (jl_value_t*)b)
+    if (reachable_var(bb->lb, b, e) || reachable_var(bb->ub, b, e))
+        return a;
+    if (bb->lb == bb->ub && jl_is_typevar(bb->lb)) {
         return intersect(a, bb->lb, e, param);
+    }
     if (!jl_is_type(a) && !jl_is_typevar(a))
         return set_var_to_const(bb, a, NULL);
     int d = bb->depth0;
@@ -2236,7 +2301,9 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
             ub = a;
         }
         else {
+            e->triangular++;
             ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
+            e->triangular--;
             save_env(e, &root, &se);
             int issub = subtype_in_env_existential(bb->lb, ub, e, 0, d);
             restore_env(e, root, &se);
@@ -2248,20 +2315,10 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         }
         if (ub != (jl_value_t*)b) {
             if (jl_has_free_typevars(ub)) {
-                // constraint X == Ref{X} is unsatisfiable. also check variables set equal to X.
-                if (var_occurs_inside(ub, b, 0, 0)) {
+                if (check_unsat_bound(ub, b, e)) {
                     JL_GC_POP();
                     return jl_bottom_type;
                 }
-                jl_varbinding_t *btemp = e->vars;
-                while (btemp != NULL) {
-                    if (btemp->lb == (jl_value_t*)b && btemp->ub == (jl_value_t*)b &&
-                        var_occurs_inside(ub, btemp->var, 0, 0)) {
-                        JL_GC_POP();
-                        return jl_bottom_type;
-                    }
-                    btemp = btemp->prev;
-                }
             }
             bb->ub = ub;
             bb->lb = ub;
@@ -2269,67 +2326,33 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         JL_GC_POP();
         return ub;
     }
-    else if (bb->constraintkind == 0) {
-        if (!jl_is_typevar(bb->ub) && !jl_is_typevar(a)) {
-            if (try_subtype_in_env(bb->ub, a, e, 0, d))
-                return (jl_value_t*)b;
-        }
-        return R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-    }
-    else if (bb->concrete || bb->constraintkind == 1) {
-        jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-        if (ub == jl_bottom_type)
-            return jl_bottom_type;
-        JL_GC_PUSH1(&ub);
-        if (!R && !subtype_bounds_in_env(bb->lb, a, e, 0, d)) {
-            // this fixes issue #30122. TODO: better fix for R flag.
-            JL_GC_POP();
-            return jl_bottom_type;
-        }
-        JL_GC_POP();
-        set_bound(&bb->ub, ub, b, e);
-        return (jl_value_t*)b;
-    }
-    else if (bb->constraintkind == 2) {
-        // TODO: removing this case fixes many test_brokens in test/subtype.jl
-        // but breaks other tests.
-        if (!subtype_bounds_in_env(a, bb->ub, e, 1, d)) {
-            // mark var as unsatisfiable by making it circular
-            bb->lb = (jl_value_t*)b;
-            return jl_bottom_type;
-        }
-        jl_value_t *lb = simple_join(bb->lb, a);
-        set_bound(&bb->lb, lb, b, e);
-        return a;
-    }
-    assert(bb->constraintkind == 3);
     jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
     if (ub == jl_bottom_type)
         return jl_bottom_type;
-    if (jl_is_typevar(a))
+    if (bb->constraintkind == 1 || e->triangular) {
+        if (e->triangular && check_unsat_bound(ub, b, e))
+            return jl_bottom_type;
+        set_bound(&bb->ub, ub, b, e);
         return (jl_value_t*)b;
-    if (ub == a) {
-        if (bb->lb == jl_bottom_type) {
-            set_bound(&bb->ub, a, b, e);
+    }
+    else if (bb->constraintkind == 0) {
+        JL_GC_PUSH1(&ub);
+        if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e, 0, d)) {
+            JL_GC_POP();
             return (jl_value_t*)b;
         }
+        JL_GC_POP();
         return ub;
     }
-    else if (bb->ub == bb->lb) {
-        return ub;
-    }
-    root = NULL;
-    JL_GC_PUSH2(&root, &ub);
-    save_env(e, &root, &se);
-    jl_value_t *ii = R ? intersect_aside(a, bb->lb, e, 1, d) : intersect_aside(bb->lb, a, e, 0, d);
-    if (ii == jl_bottom_type) {
-        restore_env(e, root, &se);
-        ii = (jl_value_t*)b;
+    assert(bb->constraintkind == 2);
+    if (!jl_is_typevar(a)) {
+        if (ub == a && bb->lb != jl_bottom_type)
+            return ub;
+        else if (jl_egal(bb->ub, bb->lb))
+            return ub;
         set_bound(&bb->ub, ub, b, e);
     }
-    free_env(&se);
-    JL_GC_POP();
-    return ii;
+    return (jl_value_t*)b;
 }
 
 // test whether `var` occurs inside constructors. `want_inv` tests only inside
@@ -2373,7 +2396,7 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want
 }
 
 // Caller might not have rooted `res`
-static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_stenv_t *e)
+static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_unionall_t *u, jl_stenv_t *e)
 {
     jl_value_t *varval = NULL;
     jl_tvar_t *newvar = vb->var;
@@ -2386,7 +2409,10 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         // given x<:T<:x, substitute x for T
         varval = vb->ub;
     }
-    else if (!vb->occurs_inv && is_leaf_bound(vb->ub)) {
+    // TODO: `vb.occurs_cov == 1` here allows substituting Tuple{<:X} => Tuple{X},
+    // which is valid but changes some ambiguity errors so we don't need to do it yet.
+    else if ((/*vb->occurs_cov == 1 || */is_leaf_bound(vb->ub)) &&
+             !var_occurs_invariant(u->body, u->var, 0)) {
         // replace T<:x with x in covariant position when possible
         varval = vb->ub;
     }
@@ -2404,9 +2430,8 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         }
     }
 
-    // prefer generating a fresh typevar, to avoid repeated renaming if the result
-    // is compared to one of the intersected types later.
-    if (!varval)
+    // TODO: this can prevent us from matching typevar identities later
+    if (!varval && (vb->lb != vb->var->lb || vb->ub != vb->var->ub))
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
     // remove/replace/rewrap free occurrences of this var in the environment
@@ -2521,7 +2546,13 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     // if the var for this unionall (based on identity) already appears somewhere
     // in the environment, rename to get a fresh var.
     // TODO: might need to look inside types in btemp->lb and btemp->ub
+    int envsize = 0;
     while (btemp != NULL) {
+        envsize++;
+        if (envsize > 120) {
+            vb->limited = 1;
+            return t;
+        }
         if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
             btemp->ub == (jl_value_t*)u->var) {
             u = rename_unionall(u);
@@ -2571,46 +2602,37 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     }
     if (res != jl_bottom_type)
         // res is rooted by callee
-        res = finish_unionall(res, vb, e);
+        res = finish_unionall(res, vb, u, e);
     JL_GC_POP();
     return res;
 }
 
 static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    jl_value_t *res=NULL, *res2=NULL, *save=NULL, *save2=NULL;
-    jl_savedenv_t se, se2;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, 0, e->vars };
-    JL_GC_PUSH6(&res, &save2, &vb.lb, &vb.ub, &save, &vb.innervars);
+    jl_value_t *res=NULL, *save=NULL;
+    jl_savedenv_t se;
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
+                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
+    JL_GC_PUSH5(&res, &vb.lb, &vb.ub, &save, &vb.innervars);
     save_env(e, &save, &se);
     res = intersect_unionall_(t, u, e, R, param, &vb);
-    if (res != jl_bottom_type) {
+    if (vb.limited) {
+        // if the environment got too big, avoid tree recursion and propagate the flag
+        if (e->vars)
+            e->vars->limited = 1;
+    }
+    else if (res != jl_bottom_type) {
         if (vb.concrete || vb.occurs_inv>1 || u->var->lb != jl_bottom_type || (vb.occurs_inv && vb.occurs_cov)) {
             restore_env(e, NULL, &se);
             vb.occurs_cov = vb.occurs_inv = 0;
-            vb.constraintkind = 3;
+            vb.constraintkind = vb.concrete ? 1 : 2;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
-        else if (vb.occurs_cov) {
-            save_env(e, &save2, &se2);
+        else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0)) {
             restore_env(e, save, &se);
             vb.occurs_cov = vb.occurs_inv = 0;
-            vb.lb = u->var->lb; vb.ub = u->var->ub;
             vb.constraintkind = 1;
-            res2 = intersect_unionall_(t, u, e, R, param, &vb);
-            if (res2 == jl_bottom_type) {
-                restore_env(e, save, &se);
-                vb.occurs_cov = vb.occurs_inv = 0;
-                vb.lb = u->var->lb; vb.ub = u->var->ub;
-                vb.constraintkind = 2;
-                res2 = intersect_unionall_(t, u, e, R, param, &vb);
-                if (res2 == jl_bottom_type)
-                    restore_env(e, save2, &se2);
-            }
-            if (res2 != jl_bottom_type)
-                res = res2;
-            free_env(&se2);
+            res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
     free_env(&se);
@@ -2923,19 +2945,6 @@ static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     return compareto_var(x, (jl_tvar_t*)y, e, -1) || compareto_var(y, (jl_tvar_t*)x, e, 1);
 }
 
-// See if var y is reachable from x via bounds; used to avoid cycles.
-static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e)
-{
-    if (x == (jl_value_t*)y)
-        return 1;
-    if (!jl_is_typevar(x))
-        return 0;
-    jl_varbinding_t *xv = lookup(e, (jl_tvar_t*)x);
-    if (xv == NULL)
-        return 0;
-    return reachable_var(xv->ub, y, e) || reachable_var(xv->lb, y, e);
-}
-
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
 // this is used to record the positions where type variables occur for the
@@ -3009,14 +3018,13 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 jl_value_t *ub=NULL, *lb=NULL;
                 JL_GC_PUSH2(&lb, &ub);
                 ub = intersect_aside(xub, yub, e, 0, xx ? xx->depth0 : 0);
-                if (xlb == y)
+                if (reachable_var(xlb, (jl_tvar_t*)y, e))
                     lb = ylb;
                 else
                     lb = simple_join(xlb, ylb);
                 if (yy) {
-                    if (!subtype_by_bounds(lb, y, e))
-                        yy->lb = lb;
-                    if (!subtype_by_bounds(y, ub, e))
+                    yy->lb = lb;
+                    if (!reachable_var(ub, (jl_tvar_t*)y, e))
                         yy->ub = ub;
                     assert(yy->ub != y);
                     assert(yy->lb != y);
@@ -3152,7 +3160,7 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     e->Runions.depth = 0;
     e->Runions.more = 0;
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Runions.used = 0;
     jl_value_t **is;
     JL_GC_PUSHARGS(is, 3);
     jl_value_t **saved = &is[2];
@@ -3169,11 +3177,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         save_env(e, saved, &se);
     }
     while (e->Runions.more) {
-        if (e->emptiness_only && ii != jl_bottom_type) {
-            free_env(&se);
-            JL_GC_POP();
-            return ii;
-        }
+        if (e->emptiness_only && ii != jl_bottom_type)
+            break;
         e->Runions.depth = 0;
         int set = e->Runions.more - 1;
         e->Runions.more = 0;
@@ -3202,9 +3207,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
         total_iter++;
         if (niter > 3 || total_iter > 400000) {
-            free_env(&se);
-            JL_GC_POP();
-            return y;
+            ii = y;
+            break;
         }
     }
     free_env(&se);
diff --git a/src/support/htable.inc b/src/support/htable.inc
index fa59624a4998f5..7a9be2514e2f0d 100644
--- a/src/support/htable.inc
+++ b/src/support/htable.inc
@@ -13,67 +13,77 @@
 static void **HTNAME##_lookup_bp_r(htable_t *h, void *key, void *ctx)   \
 {                                                                       \
     uint_t hv;                                                          \
-    size_t i, orig, index, iter;                                        \
+    size_t i, orig, index, iter, empty_slot;                            \
     size_t newsz, sz = hash_size(h);                                    \
     size_t maxprobe = max_probe(sz);                                    \
     void **tab = h->table;                                              \
     void **ol;                                                          \
                                                                         \
     hv = HFUNC((uintptr_t)key, ctx);                                    \
- retry_bp:                                                              \
-    iter = 0;                                                           \
-    index = (size_t)(hv & (sz-1)) * 2;                                  \
-    sz *= 2;                                                            \
-    orig = index;                                                       \
-                                                                        \
-    do {                                                                \
-        if (tab[index+1] == HT_NOTFOUND) {                              \
-            tab[index] = key;                                           \
-            return &tab[index+1];                                       \
+    while (1) {                                                         \
+        iter = 0;                                                       \
+        index = (size_t)(hv & (sz-1)) * 2;                              \
+        sz *= 2;                                                        \
+        orig = index;                                                   \
+        empty_slot = -1;                                                \
+                                                                        \
+        do {                                                            \
+            if (tab[index] == HT_NOTFOUND) {                            \
+                if (empty_slot == -1)                                   \
+                    empty_slot = index;                                 \
+                break;                                                  \
+            }                                                           \
+            if (tab[index+1] == HT_NOTFOUND) {                          \
+                if (empty_slot == -1)                                   \
+                    empty_slot = index;                                 \
+            }                                                           \
+                                                                        \
+            if (EQFUNC(key, tab[index], ctx))                           \
+                return &tab[index+1];                                   \
+                                                                        \
+            index = (index+2) & (sz-1);                                 \
+            iter++;                                                     \
+            if (iter > maxprobe)                                        \
+                break;                                                  \
+        } while (index != orig);                                        \
+                                                                        \
+        if (empty_slot != -1) {                                         \
+            tab[empty_slot] = key;                                      \
+            return &tab[empty_slot+1];                                  \
         }                                                               \
                                                                         \
-        if (EQFUNC(key, tab[index], ctx))                               \
-            return &tab[index+1];                                       \
-                                                                        \
-        index = (index+2) & (sz-1);                                     \
-        iter++;                                                         \
-        if (iter > maxprobe)                                            \
-            break;                                                      \
-    } while (index != orig);                                            \
-                                                                        \
-    /* table full */                                                    \
-    /* quadruple size, rehash, retry the insert */                      \
-    /* it's important to grow the table really fast; otherwise we waste */ \
-    /* lots of time rehashing all the keys over and over. */            \
-    sz = h->size;                                                       \
-    ol = h->table;                                                      \
-    if (sz < HT_N_INLINE)                                              \
-        newsz = HT_N_INLINE;                                            \
-    else if (sz >= (1<<19) || (sz <= (1<<8)))                           \
-        newsz = sz<<1;                                                  \
-    else                                                                \
-        newsz = sz<<2;                                                  \
-    /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \
-    tab = (void**)LLT_ALLOC(newsz*sizeof(void*));                       \
-    if (tab == NULL)                                                    \
-        return NULL;                                                    \
-    for(i=0; i < newsz; i++)                                            \
-        tab[i] = HT_NOTFOUND;                                           \
-    h->table = tab;                                                     \
-    h->size = newsz;                                                    \
-    for(i=0; i < sz; i+=2) {                                            \
-        if (ol[i+1] != HT_NOTFOUND) {                                   \
-            (*HTNAME##_lookup_bp_r(h, ol[i], ctx)) = ol[i+1];           \
+        /* table full */                                                \
+        /* quadruple size, rehash, retry the insert */                  \
+        /* it's important to grow the table really fast; otherwise we waste */ \
+        /* lots of time rehashing all the keys over and over. */        \
+        sz = h->size;                                                   \
+        ol = h->table;                                                  \
+        if (sz < HT_N_INLINE)                                           \
+            newsz = HT_N_INLINE;                                        \
+        else if (sz >= (1<<19) || (sz <= (1<<8)))                       \
+            newsz = sz<<1;                                              \
+        else                                                            \
+            newsz = sz<<2;                                              \
+        /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \
+        tab = (void**)LLT_ALLOC(newsz*sizeof(void*));                   \
+        if (tab == NULL)                                                \
+            return NULL;                                                \
+        for (i = 0; i < newsz; i++)                                     \
+            tab[i] = HT_NOTFOUND;                                       \
+        h->table = tab;                                                 \
+        h->size = newsz;                                                \
+        for (i = 0; i < sz; i += 2) {                                   \
+            if (ol[i+1] != HT_NOTFOUND) {                               \
+                (*HTNAME##_lookup_bp_r(h, ol[i], ctx)) = ol[i+1];       \
+            }                                                           \
         }                                                               \
-    }                                                                   \
-    if (ol != &h->_space[0])                                            \
-        LLT_FREE(ol);                                                   \
+        if (ol != &h->_space[0])                                        \
+            LLT_FREE(ol);                                               \
                                                                         \
-    sz = hash_size(h);                                                  \
-    maxprobe = max_probe(sz);                                           \
-    tab = h->table;                                                     \
-                                                                        \
-    goto retry_bp;                                                      \
+        sz = hash_size(h);                                              \
+        maxprobe = max_probe(sz);                                       \
+        tab = h->table;                                                 \
+    }                                                                   \
                                                                         \
     return NULL;                                                        \
 }                                                                       \
diff --git a/src/support/libsupportinit.c b/src/support/libsupportinit.c
index c0ccf7836017e6..897aea944237ee 100644
--- a/src/support/libsupportinit.c
+++ b/src/support/libsupportinit.c
@@ -3,6 +3,10 @@
 #include <locale.h>
 #include "libsupport.h"
 
+#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L
+#include <sys/resource.h>
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -21,7 +25,30 @@ void libsupport_init(void)
     if (!isInitialized) {
         ios_init_stdstreams();
         isInitialized = 1;
-
+#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L
+        // Raise the open file descriptor limit.
+        {
+            struct rlimit rl;
+            if (getrlimit(RLIMIT_NOFILE, &rl) == 0 && rl.rlim_cur != rl.rlim_max) {
+                // Do a binary search for the limit.
+                rlim_t min = rl.rlim_cur;
+                rlim_t max = 1 << 20;
+                // But if there's a defined upper bound, don't search, just set it.
+                if (rl.rlim_max != RLIM_INFINITY) {
+                    min = rl.rlim_max;
+                    max = rl.rlim_max;
+                }
+                do {
+                    rl.rlim_cur = min + (max - min) / 2;
+                    if (setrlimit(RLIMIT_NOFILE, &rl)) {
+                        max = rl.rlim_cur;
+                    } else {
+                        min = rl.rlim_cur;
+                    }
+                } while (min + 1 < max);
+            }
+        }
+#endif
         // adopt the user's locale for most formatting
         setlocale(LC_ALL, "");
         // but use locale-independent numeric formats (for parsing)
diff --git a/src/support/platform.h b/src/support/platform.h
index 1bb46d3bc648c2..8d29b44a92f8f3 100644
--- a/src/support/platform.h
+++ b/src/support/platform.h
@@ -50,6 +50,25 @@
 #define _COMPILER_GCC_
 #endif
 
+#if defined(__has_feature) // Clang flavor
+#if __has_feature(address_sanitizer)
+#define _COMPILER_ASAN_ENABLED_
+#endif
+#if __has_feature(memory_sanitizer)
+#define _COMPILER_MSAN_ENABLED_
+#endif
+#if __has_feature(thread_sanitizer)
+#if __clang_major__ < 11
+#error Thread sanitizer runtime libraries in clang < 11 leak memory and cannot be used
+#endif
+#define _COMPILER_TSAN_ENABLED_
+#endif
+#else // GCC flavor
+#if defined(__SANITIZE_ADDRESS__)
+#define _COMPILER_ASAN_ENABLED_
+#endif
+#endif // __has_feature
+
 /*******************************************************************************
 *                               OS                                             *
 *******************************************************************************/
diff --git a/src/support/win32_ucontext.h b/src/support/win32_ucontext.h
index 6730cb96ee873e..b856abdc26eef5 100644
--- a/src/support/win32_ucontext.h
+++ b/src/support/win32_ucontext.h
@@ -16,7 +16,7 @@ typedef struct {
         size_t ss_size;
     } uc_stack;
     jmp_buf uc_mcontext;
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     void *tsan_state;
 #endif
 } win32_ucontext_t;
diff --git a/src/symbol.c b/src/symbol.c
index f1a4343a39e8e3..fe8e975f8f5255 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -21,7 +21,9 @@ static jl_sym_t *symtab = NULL;
 
 static uintptr_t hash_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 {
-    return memhash(str, len) ^ ~(uintptr_t)0/3*2;
+    uintptr_t oid = memhash(str, len) ^ ~(uintptr_t)0/3*2;
+    // compute the same hash value as v1.6 and earlier, which used `hash_uint(3h - objectid(sym))`
+    return inthash(-oid);
 }
 
 static size_t symbol_nbytes(size_t len) JL_NOTSAFEPOINT
diff --git a/src/sys.c b/src/sys.c
index 684e000026c6d0..5080c361494dcb 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -13,6 +13,7 @@
 
 #include "julia.h"
 #include "julia_internal.h"
+#include "llvm-version.h"
 
 #ifdef _OS_WINDOWS_
 #include <psapi.h>
@@ -26,6 +27,7 @@
 #include <sys/ptrace.h>
 #include <sys/mman.h>
 #include <dlfcn.h>
+#include <grp.h>
 #endif
 
 #ifndef _OS_WINDOWS_
@@ -52,7 +54,7 @@
 #include <intrin.h>
 #endif
 
-#ifdef JL_MSAN_ENABLED
+#ifdef _COMPILER_MSAN_ENABLED_
 #include <sanitizer/msan_interface.h>
 #endif
 
@@ -230,6 +232,231 @@ JL_DLLEXPORT double jl_stat_ctime(char *statbuf)
     return (double)s->st_ctim.tv_sec + (double)s->st_ctim.tv_nsec * 1e-9;
 }
 
+JL_DLLEXPORT int jl_os_get_passwd(uv_passwd_t *pwd, size_t uid)
+{
+#ifdef _OS_WINDOWS_
+  return UV_ENOTSUP;
+#else
+  // taken directly from libuv
+  struct passwd pw;
+  struct passwd* result;
+  char* buf;
+  size_t bufsize;
+  size_t name_size;
+  size_t homedir_size;
+  size_t shell_size;
+  size_t gecos_size;
+  long initsize;
+  int r;
+
+  if (pwd == NULL)
+    return UV_EINVAL;
+
+  initsize = sysconf(_SC_GETPW_R_SIZE_MAX);
+
+  if (initsize <= 0)
+    bufsize = 4096;
+  else
+    bufsize = (size_t) initsize;
+
+  buf = NULL;
+
+  for (;;) {
+    free(buf);
+    buf = (char*)malloc(bufsize);
+
+    if (buf == NULL)
+      return UV_ENOMEM;
+
+    r = getpwuid_r(uid, &pw, buf, bufsize, &result);
+
+    if (r != ERANGE)
+      break;
+
+    bufsize *= 2;
+  }
+
+  if (r != 0) {
+    free(buf);
+    return -r;
+  }
+
+  if (result == NULL) {
+    free(buf);
+    return UV_ENOENT;
+  }
+
+  /* Allocate memory for the username, gecos, shell, and home directory. */
+  name_size = strlen(pw.pw_name) + 1;
+  homedir_size = strlen(pw.pw_dir) + 1;
+  shell_size = strlen(pw.pw_shell) + 1;
+
+#ifdef __MVS__
+  gecos_size = 0; /* pw_gecos does not exist on zOS. */
+#else
+  if (pw.pw_gecos != NULL)
+    gecos_size = strlen(pw.pw_gecos) + 1;
+  else
+    gecos_size = 0;
+#endif
+
+  pwd->username = (char*)malloc(name_size +
+                         homedir_size +
+                         shell_size +
+                         gecos_size);
+
+  if (pwd->username == NULL) {
+    free(buf);
+    return UV_ENOMEM;
+  }
+
+  /* Copy the username */
+  memcpy(pwd->username, pw.pw_name, name_size);
+
+  /* Copy the home directory */
+  pwd->homedir = pwd->username + name_size;
+  memcpy(pwd->homedir, pw.pw_dir, homedir_size);
+
+  /* Copy the shell */
+  pwd->shell = pwd->homedir + homedir_size;
+  memcpy(pwd->shell, pw.pw_shell, shell_size);
+
+  /* Copy the gecos field */
+#ifdef __MVS__
+  pwd->gecos = NULL;  /* pw_gecos does not exist on zOS. */
+#else
+  if (pw.pw_gecos == NULL) {
+    pwd->gecos = NULL;
+  } else {
+    pwd->gecos = pwd->shell + shell_size;
+    memcpy(pwd->gecos, pw.pw_gecos, gecos_size);
+  }
+#endif
+
+  /* Copy the uid and gid */
+  pwd->uid = pw.pw_uid;
+  pwd->gid = pw.pw_gid;
+
+  free(buf);
+
+  return 0;
+#endif
+}
+
+typedef struct jl_group_s {
+    char* groupname;
+    long gid;
+    char** members;
+} jl_group_t;
+
+JL_DLLEXPORT int jl_os_get_group(jl_group_t *grp, size_t gid)
+{
+#ifdef _OS_WINDOWS_
+  return UV_ENOTSUP;
+#else
+  // modified directly from uv_os_get_password
+  struct group gp;
+  struct group* result;
+  char* buf;
+  char* gr_mem;
+  size_t bufsize;
+  size_t name_size;
+  long members;
+  size_t mem_size;
+  long initsize;
+  int r;
+
+  if (grp == NULL)
+    return UV_EINVAL;
+
+  initsize = sysconf(_SC_GETGR_R_SIZE_MAX);
+
+  if (initsize <= 0)
+    bufsize = 4096;
+  else
+    bufsize = (size_t) initsize;
+
+  buf = NULL;
+
+  for (;;) {
+    free(buf);
+    buf = (char*)malloc(bufsize);
+
+    if (buf == NULL)
+      return UV_ENOMEM;
+
+    r = getgrgid_r(gid, &gp, buf, bufsize, &result);
+
+    if (r != ERANGE)
+      break;
+
+    bufsize *= 2;
+  }
+
+  if (r != 0) {
+    free(buf);
+    return -r;
+  }
+
+  if (result == NULL) {
+    free(buf);
+    return UV_ENOENT;
+  }
+
+  /* Allocate memory for the groupname and members. */
+  name_size = strlen(gp.gr_name) + 1;
+  members = 0;
+  mem_size = sizeof(char*);
+  for (r = 0; gp.gr_mem[r] != NULL; r++) {
+    mem_size += strlen(gp.gr_mem[r]) + 1 + sizeof(char*);
+    members++;
+  }
+
+  gr_mem = (char*)malloc(name_size + mem_size);
+  if (gr_mem == NULL) {
+    free(buf);
+    return UV_ENOMEM;
+  }
+
+  /* Copy the members */
+  grp->members = (char**) gr_mem;
+  grp->members[members] = NULL;
+  gr_mem = (char*) ((char**) gr_mem + members + 1);
+  for (r = 0; r < members; r++) {
+    grp->members[r] = gr_mem;
+    gr_mem = stpcpy(gr_mem, gp.gr_mem[r]) + 1;
+  }
+  assert(gr_mem == (char*)grp->members + mem_size);
+
+  /* Copy the groupname */
+  grp->groupname = gr_mem;
+  memcpy(grp->groupname, gp.gr_name, name_size);
+  gr_mem += name_size;
+
+  /* Copy the gid */
+  grp->gid = gp.gr_gid;
+
+  free(buf);
+
+  return 0;
+#endif
+}
+
+JL_DLLEXPORT void jl_os_free_group(jl_group_t *grp)
+{
+  if (grp == NULL)
+    return;
+
+  /*
+    The memory for is allocated in a single uv__malloc() call. The base of the
+    pointer is stored in grp->members, so that is the only field that needs
+    to be freed.
+  */
+  free(grp->members);
+  grp->members = NULL;
+  grp->groupname = NULL;
+}
+
 // --- buffer manipulation ---
 
 JL_DLLEXPORT jl_array_t *jl_take_buffer(ios_t *s)
@@ -360,6 +587,15 @@ typedef DWORD (WINAPI *GAPC)(WORD);
 #endif
 #endif
 
+// Apple's M1 processor is a big.LITTLE style processor, with 4x "performance"
+// cores, and 4x "efficiency" cores.  Because Julia expects to be able to run
+// things like heavy linear algebra workloads on all cores, it's best for us
+// to only spawn as many threads as there are performance cores.  Once macOS
+// 12 is released, we'll be able to query the multiple "perf levels" of the
+// cores of a CPU (see this PR [0] to pytorch/cpuinfo for an example) but
+// until it's released, we will just recognize the M1 by its CPU family
+// identifier, then subtract how many efficiency cores we know it has.
+
 JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 {
 #if defined(HW_AVAILCPU) && defined(HW_NCPU)
@@ -372,6 +608,19 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
         sysctl(nm, 2, &count, &len, NULL, 0);
         if (count < 1) { count = 1; }
     }
+
+#if defined(__APPLE__) && defined(_CPU_AARCH64_)
+    // Manually subtract efficiency cores for Apple's big.LITTLE cores
+    int32_t family = 0;
+    len = 4;
+    sysctlbyname("hw.cpufamily", &family, &len, NULL, 0);
+    if (family >= 1 && count > 1) {
+        if (family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
+            // We know the Apple M1 has 4 efficiency cores, so subtract them out.
+            count -= 4;
+        }
+    }
+#endif
     return count;
 #elif defined(_SC_NPROCESSORS_ONLN)
     long count = sysconf(_SC_NPROCESSORS_ONLN);
@@ -569,7 +818,7 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
 
     struct link_map *map;
     dlinfo(handle, RTLD_DI_LINKMAP, &map);
-#ifdef JL_MSAN_ENABLED
+#ifdef _COMPILER_MSAN_ENABLED_
     __msan_unpoison(&map,sizeof(struct link_map*));
     if (map) {
         __msan_unpoison(map, sizeof(struct link_map));
@@ -668,7 +917,13 @@ JL_DLLEXPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT {
     HMODULE mod;
     // FIXME: GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS on LLVMContextCreate,
     //        but that just points to libjulia.dll
-    if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, "LLVM", &mod))
+#if JL_LLVM_VERSION <= 110000
+    const char* libLLVM = "LLVM";
+#else
+    const char* libLLVM = "libLLVM";
+#endif
+
+    if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, libLLVM, &mod))
         return jl_nothing;
 
     char path[MAX_PATH];
diff --git a/src/task.c b/src/task.c
index 4d38d30f5cc56e..88d4eac0863c9a 100644
--- a/src/task.c
+++ b/src/task.c
@@ -29,18 +29,20 @@
 #include <stdlib.h>
 #include <string.h>
 #include <signal.h>
+#include <unistd.h>
 #include <errno.h>
 #include <inttypes.h>
 #include "julia.h"
 #include "julia_internal.h"
 #include "threading.h"
 #include "julia_assert.h"
+#include "support/hashing.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#if defined(JL_ASAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_)
 static inline void sanitizer_start_switch_fiber(const void* bottom, size_t size) {
     __sanitizer_start_switch_fiber(NULL, bottom, size);
 }
@@ -52,7 +54,7 @@ static inline void sanitizer_start_switch_fiber(const void* bottom, size_t size)
 static inline void sanitizer_finish_switch_fiber(void) {}
 #endif
 
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 static inline void tsan_destroy_ctx(jl_ptls_t ptls, void *state) {
     if (state != &ptls->root_task->state) {
         __tsan_destroy_fiber(ctx->state);
@@ -66,8 +68,8 @@ static inline void tsan_switch_to_ctx(void *state)  {
 
 // empirically, jl_finish_task needs about 64k stack space to infer/run
 // and additionally, gc-stack reserves 64k for the guard pages
-#if defined(MINSIGSTKSZ) && MINSIGSTKSZ > 131072
-#define MINSTKSZ MINSIGSTKSZ
+#if defined(MINSIGSTKSZ)
+#define MINSTKSZ (MINSIGSTKSZ > 131072 ? MINSIGSTKSZ : 131072)
 #else
 #define MINSTKSZ 131072
 #endif
@@ -191,7 +193,7 @@ static jl_function_t *task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
 
 void JL_NORETURN jl_finish_task(jl_task_t *t)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     JL_SIGATOMIC_BEGIN();
     if (t->_isexception)
         jl_atomic_store_release(&t->_state, JL_TASK_STATE_FAILED);
@@ -200,9 +202,9 @@ void JL_NORETURN jl_finish_task(jl_task_t *t)
     if (t->copy_stack) // early free of stkbuf
         t->stkbuf = NULL;
     // ensure that state is cleared
-    ptls->in_finalizer = 0;
-    ptls->in_pure_callback = 0;
-    jl_get_ptls_states()->world_age = jl_world_counter;
+    ct->ptls->in_finalizer = 0;
+    ct->ptls->in_pure_callback = 0;
+    ct->world_age = jl_world_counter;
     // let the runtime know this task is dead and find a new task to run
     jl_function_t *done = jl_atomic_load_relaxed(&task_done_hook_func);
     if (done == NULL) {
@@ -223,7 +225,7 @@ void JL_NORETURN jl_finish_task(jl_task_t *t)
     abort();
 }
 
-JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid)
+JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid)
 {
     size_t off = 0;
 #ifndef _OS_WINDOWS_
@@ -235,19 +237,16 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid)
         off = ROOT_TASK_STACK_ADJUSTMENT;
     }
 #endif
-    *tid = -1;
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls = jl_all_tls_states[i];
-        if (ptls->current_task == task) {
-            *tid = i;
+    jl_ptls_t ptls2 = task->ptls;
+    *ptid = -1;
+    if (ptls2) {
+        *ptid = task->tid;
 #ifdef COPY_STACKS
-            if (task->copy_stack) {
-                *size = ptls->stacksize;
-                return (char *)ptls->stackbase - *size;
-            }
-#endif
-            break; // continue with normal return
+        if (task->copy_stack) {
+            *size = ptls2->stacksize;
+            return (char *)ptls2->stackbase - *size;
         }
+#endif
     }
     *size = task->bufsz - off;
     return (void *)((char *)task->stkbuf + off);
@@ -263,10 +262,8 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
         return;
     }
 
-    int16_t tid = task->tid;
-    jl_ptls_t ptls2 = (tid != -1) ? jl_all_tls_states[tid] : 0;
-
-    if (task->copy_stack && ptls2 && task == ptls2->current_task) {
+    jl_ptls_t ptls2 = task->ptls;
+    if (task->copy_stack && ptls2) {
         *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize;
         *total_end = *active_end = (char*)ptls2->stackbase;
     }
@@ -312,38 +309,33 @@ NOINLINE static void record_backtrace(jl_ptls_t ptls, int skip) JL_NOTSAFEPOINT
     ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, skip + 1);
 }
 
-JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
-{
-    _julia_init(rel);
-}
-
 JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT
 {
-    jl_get_ptls_states()->next_task = task;
+    jl_current_task->ptls->next_task = task;
 }
 
 JL_DLLEXPORT jl_task_t *jl_get_next_task(void) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->next_task)
-        return ptls->next_task;
-    return ptls->current_task;
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->next_task)
+        return ct->ptls->next_task;
+    return ct;
 }
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
 const char tsan_state_corruption[] = "TSAN state corrupted. Exiting HARD!\n";
 #endif
 
-static void ctx_switch(jl_ptls_t ptls)
+static void ctx_switch(jl_task_t *lastt)
 {
+    jl_ptls_t ptls = lastt->ptls;
     jl_task_t **pt = &ptls->next_task;
     jl_task_t *t = *pt;
-    assert(t != ptls->current_task);
-    jl_task_t *lastt = ptls->current_task;
+    assert(t != lastt);
     // none of these locks should be held across a task switch
     assert(ptls->locks.len == 0);
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     if (lastt->ctx.tsan_state != __tsan_get_current_fiber()) {
         // Something went really wrong - don't even assume that we can
         // use assert/abort which involve lots of signal handling that
@@ -396,19 +388,20 @@ static void ctx_switch(jl_ptls_t ptls)
         else
 #endif
         *pt = NULL; // can't fail after here: clear the gc-root for the target task now
-        lastt->gcstack = ptls->pgcstack;
+        lastt->ptls = NULL;
     }
 
-    // set up global state for new task
-    ptls->pgcstack = t->gcstack;
-    ptls->world_age = 0;
-    t->gcstack = NULL;
+    // set up global state for new task and clear global state for old task
+    t->ptls = ptls;
+    ptls->current_task = t;
+    JL_GC_PROMISE_ROOTED(t);
+    lastt->ptls = NULL;
 #ifdef MIGRATE_TASKS
     ptls->previous_task = lastt;
 #endif
-    ptls->current_task = t;
+    jl_set_pgcstack(&t->gcstack);
 
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
     tsan_switch_to_ctx(&t->tsan_state);
     if (killed)
         tsan_destroy_ctx(ptls, &lastt->tsan_state);
@@ -475,44 +468,27 @@ static void ctx_switch(jl_ptls_t ptls)
     sanitizer_finish_switch_fiber();
 }
 
-static jl_ptls_t NOINLINE refetch_ptls(void)
-{
-    return jl_get_ptls_states();
-}
-
 JL_DLLEXPORT void jl_switch(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     jl_task_t *t = ptls->next_task;
-    jl_task_t *ct = ptls->current_task;
     if (t == ct) {
         return;
     }
-    if (t->_state != JL_TASK_STATE_RUNNABLE || (t->started && t->stkbuf == NULL)) {
-        ct->_isexception = t->_isexception;
-        ct->result = t->result;
-        jl_gc_wb(ct, ct->result);
-        return;
-    }
+    if (t->started && t->stkbuf == NULL)
+        jl_error("attempt to switch to exited task");
     if (ptls->in_finalizer)
         jl_error("task switch not allowed from inside gc finalizer");
     if (ptls->in_pure_callback)
         jl_error("task switch not allowed from inside staged nor pure functions");
-    if (t->sticky && jl_atomic_load_acquire(&t->tid) == -1) {
-        // manually yielding to a task
-        if (jl_atomic_compare_exchange(&t->tid, -1, ptls->tid) != -1)
-            jl_error("cannot switch to task running on another thread");
-    }
-    else if (t->tid != ptls->tid) {
+    if (!jl_set_task_tid(t, ptls->tid)) // manually yielding to a task
         jl_error("cannot switch to task running on another thread");
-    }
 
     // Store old values on the stack and reset
     sig_atomic_t defer_signal = ptls->defer_signal;
     int8_t gc_state = jl_gc_unsafe_enter(ptls);
-    size_t world_age = ptls->world_age;
     int finalizers_inhibited = ptls->finalizers_inhibited;
-    ptls->world_age = 0;
     ptls->finalizers_inhibited = 0;
 
 #ifdef ENABLE_TIMINGS
@@ -522,25 +498,24 @@ JL_DLLEXPORT void jl_switch(void)
     ptls->timing_stack = NULL;
 #endif
 
-    ctx_switch(ptls);
+    ctx_switch(ct);
 
 #ifdef MIGRATE_TASKS
-    ptls = refetch_ptls();
+    ptls = ct->ptls;
     t = ptls->previous_task;
+    ptls->previous_task = NULL;
+    assert(t != ct);
     assert(t->tid == ptls->tid);
     if (!t->sticky && !t->copy_stack)
-        t->tid = -1;
-#elif defined(NDEBUG)
-    (void)refetch_ptls();
+        jl_atomic_store_release(&t->tid, -1);
 #else
-    assert(ptls == refetch_ptls());
+    assert(ptls == ct->ptls);
 #endif
 
     // Pop old values back off the stack
-    assert(ct == ptls->current_task &&
-           0 == ptls->world_age &&
+    assert(ct == jl_current_task &&
+           0 != ct->ptls &&
            0 == ptls->finalizers_inhibited);
-    ptls->world_age = world_age;
     ptls->finalizers_inhibited = finalizers_inhibited;
 
 #ifdef ENABLE_TIMINGS
@@ -580,31 +555,29 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e)
 }
 
 // yield to exception handler
-static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED)
+static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    assert(!jl_get_safe_restore());
+    jl_ptls_t ptls = ct->ptls;
     ptls->io_wait = 0;
     // @time needs its compile timer disabled on error,
     // and cannot use a try-finally as it would break scope for assignments
-    jl_measure_compile_time[ptls->tid] = 0;
-    if (ptls->safe_restore)
-        jl_longjmp(*ptls->safe_restore, 1);
-    // During startup
-    if (!ptls->current_task)
-        jl_no_exc_handler(exception);
+    // We blindly disable compilation time tracking here, for all running Tasks, even though
+    // it may cause some incorrect measurements. This is a known bug, and is being tracked
+    // here: https://github.com/JuliaLang/julia/pull/39138
+    jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0);
     JL_GC_PUSH1(&exception);
     jl_gc_unsafe_enter(ptls);
     if (exception) {
         // The temporary ptls->bt_data is rooted by special purpose code in the
         // GC. This exists only for the purpose of preserving bt_data until we
         // set ptls->bt_size=0 below.
-        assert(ptls->current_task);
-        jl_push_excstack(&ptls->current_task->excstack, exception,
+        jl_push_excstack(&ct->excstack, exception,
                           ptls->bt_data, ptls->bt_size);
         ptls->bt_size = 0;
     }
-    assert(ptls->current_task->excstack && ptls->current_task->excstack->top);
-    jl_handler_t *eh = ptls->current_task->eh;
+    assert(ct->excstack && ct->excstack->top);
+    jl_handler_t *eh = ct->eh;
     if (eh != NULL) {
 #ifdef ENABLE_TIMINGS
         jl_timing_block_t *cur_block = ptls->timing_stack;
@@ -624,21 +597,26 @@ static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED)
 // record backtrace and raise an error
 JL_DLLEXPORT void jl_throw(jl_value_t *e JL_MAYBE_UNROOTED)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
     assert(e != NULL);
-    if (ptls->safe_restore)
-        throw_internal(NULL);
-    record_backtrace(ptls, 1);
-    throw_internal(e);
+    jl_jmp_buf *safe_restore = jl_get_safe_restore();
+    if (safe_restore)
+        jl_longjmp(*safe_restore, 1);
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL) // During startup
+        jl_no_exc_handler(e);
+    JL_GC_PROMISE_ROOTED(ct);
+    record_backtrace(ct->ptls, 1);
+    throw_internal(ct, e);
 }
 
 // rethrow with current excstack state
 JL_DLLEXPORT void jl_rethrow(void)
 {
-    jl_excstack_t *excstack = jl_get_ptls_states()->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *excstack = ct->excstack;
     if (!excstack || excstack->top == 0)
         jl_error("rethrow() not allowed outside a catch block");
-    throw_internal(NULL);
+    throw_internal(ct, NULL);
 }
 
 // Special case throw for errors detected inside signal handlers.  This is not
@@ -647,29 +625,93 @@ JL_DLLEXPORT void jl_rethrow(void)
 JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void)
 {
 CFI_NORETURN
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_jmp_buf *safe_restore = jl_get_safe_restore();
+    if (safe_restore)
+        jl_longjmp(*safe_restore, 1);
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     jl_value_t *e = ptls->sig_exception;
     ptls->sig_exception = NULL;
-    throw_internal(e);
+    throw_internal(ct, e);
 }
 
 JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED)
 {
     // TODO: Should uses of `rethrow(exc)` be replaced with a normal throw, now
     // that exception stacks allow root cause analysis?
-    jl_excstack_t *excstack = jl_get_ptls_states()->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *excstack = ct->excstack;
     if (!excstack || excstack->top == 0)
         jl_error("rethrow(exc) not allowed outside a catch block");
     // overwrite exception on top of stack. see jl_excstack_exception
     jl_excstack_raw(excstack)[excstack->top-1].jlvalue = e;
     JL_GC_PROMISE_ROOTED(e);
-    throw_internal(NULL);
+    throw_internal(ct, NULL);
+}
+
+/* This is xoshiro256++ 1.0, used for tasklocal random number generation in Julia.
+   This implementation is intended for embedders and internal use by the runtime, and is
+   based on the reference implementation at https://prng.di.unimi.it
+
+   Credits go to David Blackman and Sebastiano Vigna for coming up with this PRNG.
+   They described xoshiro256++ in "Scrambled Linear Pseudorandom Number Generators",
+   ACM Trans. Math. Softw., 2021.
+
+   There is a pure Julia implementation in stdlib that tends to be faster when used from
+   within Julia, due to inlining and more agressive architecture-specific optimizations.
+*/
+JL_DLLEXPORT uint64_t jl_tasklocal_genrandom(jl_task_t *task) JL_NOTSAFEPOINT
+{
+    uint64_t s0 = task->rngState0;
+    uint64_t s1 = task->rngState1;
+    uint64_t s2 = task->rngState2;
+    uint64_t s3 = task->rngState3;
+
+    uint64_t t = s0 << 17;
+    uint64_t tmp = s0 + s3;
+    uint64_t res = ((tmp << 23) | (tmp >> 41)) + s0;
+    s2 ^= s0;
+    s3 ^= s1;
+    s1 ^= s2;
+    s0 ^= s3;
+    s2 ^= t;
+    s3 = (s3 << 45) | (s3 >> 19);
+
+    task->rngState0 = s0;
+    task->rngState1 = s1;
+    task->rngState2 = s2;
+    task->rngState3 = s3;
+    return res;
+}
+
+void rng_split(jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT
+{
+    /* TODO: consider a less ad-hoc construction
+       Ideally we could just use the output of the random stream to seed the initial
+       state of the child. Out of an overabundance of caution we multiply with
+       effectively random coefficients, to break possible self-interactions.
+
+       It is not the goal to mix bits -- we work under the assumption that the
+       source is well-seeded, and its output looks effectively random.
+       However, xoshiro has never been studied in the mode where we seed the
+       initial state with the output of another xoshiro instance.
+
+       Constants have nothing up their sleeve:
+       0x02011ce34bce797f == hash(UInt(1))|0x01
+       0x5a94851fb48a6e05 == hash(UInt(2))|0x01
+       0x3688cf5d48899fa7 == hash(UInt(3))|0x01
+       0x867b4bb4c42e5661 == hash(UInt(4))|0x01
+    */
+    to->rngState0 = 0x02011ce34bce797f * jl_tasklocal_genrandom(from);
+    to->rngState1 = 0x5a94851fb48a6e05 * jl_tasklocal_genrandom(from);
+    to->rngState2 = 0x3688cf5d48899fa7 * jl_tasklocal_genrandom(from);
+    to->rngState3 = 0x867b4bb4c42e5661 * jl_tasklocal_genrandom(from);
 }
 
 JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_task_t *t = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
+    jl_task_t *ct = jl_current_task;
+    jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type);
     t->copy_stack = 0;
     if (ssize == 0) {
         // stack size unspecified; use default
@@ -700,7 +742,9 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->donenotify = completion_future;
     t->_isexception = 0;
     // Inherit logger state from parent task
-    t->logstate = ptls->current_task->logstate;
+    t->logstate = ct->logstate;
+    // Fork task-local random state from parent
+    rng_split(ct, t);
     // there is no active exception handler available on this stack yet
     t->eh = NULL;
     t->sticky = 1;
@@ -708,40 +752,34 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->excstack = NULL;
     t->started = 0;
     t->prio = -1;
-    t->tid = -1;
+    t->tid = t->copy_stack ? ct->tid : -1; // copy_stacks are always pinned since they can't be moved
+    t->ptls = NULL;
+    t->world_age = 0;
 
+#ifdef COPY_STACKS
+    if (!t->copy_stack) {
 #if defined(JL_DEBUG_BUILD)
-    if (!t->copy_stack)
         memset(&t->ctx, 0, sizeof(t->ctx));
 #endif
-#ifdef COPY_STACKS
-    if (always_copy_stacks)
-        memcpy(&t->copy_stack_ctx, &ptls->copy_stack_ctx, sizeof(t->copy_stack_ctx));
-    else if (t->copy_stack)
-        memcpy(&t->ctx, &ptls->base_ctx, sizeof(t->ctx));
+    }
+    else {
+        if (always_copy_stacks)
+            memcpy(&t->copy_stack_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->copy_stack_ctx));
+        else
+            memcpy(&t->ctx, &ct->ptls->base_ctx, sizeof(t->ctx));
+    }
 #endif
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     t->tsan_state = __tsan_create_fiber(0);
 #endif
     return t;
 }
 
-JL_DLLEXPORT jl_value_t *jl_get_current_task(void)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return (jl_value_t*)ptls->current_task;
-}
-
-JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return ptls->safe_restore;
-}
-
-JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
+// a version of jl_current_task safe for unmanaged threads
+JL_DLLEXPORT jl_task_t *jl_get_current_task(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    ptls->safe_restore = sr;
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    return pgcstack == NULL ? NULL : container_of(pgcstack, jl_task_t, gcstack);
 }
 
 #ifdef JL_HAVE_ASYNCIFY
@@ -752,8 +790,8 @@ JL_DLLEXPORT jl_ucontext_t *task_ctx_ptr(jl_task_t *t)
 
 JL_DLLEXPORT jl_value_t *jl_get_root_task(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return (jl_value_t*)ptls->root_task;
+    jl_task_t *ct = jl_current_task;
+    return (jl_value_t*)ct->ptls->root_task;
 }
 
 JL_DLLEXPORT void jl_task_wait()
@@ -762,10 +800,11 @@ JL_DLLEXPORT void jl_task_wait()
     if (!wait_func) {
         wait_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("wait"));
     }
-    size_t last_age = jl_get_ptls_states()->world_age;
-    jl_get_ptls_states()->world_age = jl_get_world_counter();
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
     jl_apply(&wait_func, 1);
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
 }
 
 JL_DLLEXPORT void jl_schedule_task(jl_task_t *task)
@@ -774,11 +813,12 @@ JL_DLLEXPORT void jl_schedule_task(jl_task_t *task)
     if (!sched_func) {
         sched_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("schedule"));
     }
-    size_t last_age = jl_get_ptls_states()->world_age;
-    jl_get_ptls_states()->world_age = jl_get_world_counter();
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
     jl_value_t *args[] = {(jl_value_t*)sched_func, (jl_value_t*)task};
     jl_apply(args, 2);
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
 }
 #endif
 
@@ -809,23 +849,29 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
 CFI_NORETURN
     // this runs the first time we switch to a task
     sanitizer_finish_switch_fiber();
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_task_t *t = ptls->current_task;
+#ifdef __clang_analyzer__
+    jl_task_t *ct = jl_get_current_task();
+    JL_GC_PROMISE_ROOTED(ct);
+#else
+    jl_task_t *ct = jl_current_task;
+#endif
+    jl_ptls_t ptls = ct->ptls;
     jl_value_t *res;
     assert(ptls->finalizers_inhibited == 0);
 
 #ifdef MIGRATE_TASKS
     jl_task_t *pt = ptls->previous_task;
+    ptls->previous_task = NULL;
     if (!pt->sticky && !pt->copy_stack)
-        pt->tid = -1;
+        jl_atomic_store_release(&pt->tid, -1);
 #endif
 
-    t->started = 1;
-    if (t->_isexception) {
+    ct->started = 1;
+    if (ct->_isexception) {
         record_backtrace(ptls, 0);
-        jl_push_excstack(&t->excstack, t->result,
+        jl_push_excstack(&ct->excstack, ct->result,
                          ptls->bt_data, ptls->bt_size);
-        res = t->result;
+        res = ct->result;
     }
     else {
         JL_TRY {
@@ -834,19 +880,19 @@ CFI_NORETURN
                 jl_sigint_safepoint(ptls);
             }
             JL_TIMING(ROOT);
-            ptls->world_age = jl_world_counter;
-            res = jl_apply(&t->start, 1);
+            ct->world_age = jl_world_counter;
+            res = jl_apply(&ct->start, 1);
         }
         JL_CATCH {
             res = jl_current_exception();
-            t->_isexception = 1;
+            ct->_isexception = 1;
             goto skip_pop_exception;
         }
 skip_pop_exception:;
     }
-    t->result = res;
-    jl_gc_wb(t, t->result);
-    jl_finish_task(t);
+    ct->result = res;
+    jl_gc_wb(ct, ct->result);
+    jl_finish_task(ct);
     gc_debug_critical_error();
     abort();
 }
@@ -1105,13 +1151,13 @@ static void jl_start_fiber_set(jl_ucontext_t *t)
 #endif
 
 #if defined(JL_HAVE_SIGALTSTACK)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN support not currently implemented for this tasking model
 #endif
 
 static void start_basefiber(int sig)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0))
         start_task(); // sanitizer_finish_switch_fiber is part of start_task
 }
@@ -1124,7 +1170,7 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
     if (stk == NULL)
         return NULL;
     // setup
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_ucontext_t base_ctx;
     memcpy(&base_ctx, &ptls->base_ctx, sizeof(ptls->base_ctx));
     sigfillset(&set);
@@ -1195,7 +1241,7 @@ static void jl_set_fiber(jl_ucontext_t *t)
 #endif
 
 #if defined(JL_HAVE_ASYNCIFY)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN support not currently implemented for this tasking model
 #endif
 
@@ -1212,15 +1258,24 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) J
 #endif
 
 // Initialize a root task using the given stack.
-void jl_init_root_task(void *stack_lo, void *stack_hi)
+void jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->root_task == NULL) {
-        ptls->root_task = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
-        memset(ptls->root_task, 0, sizeof(jl_task_t));
-        ptls->root_task->tls = jl_nothing;
-    }
-    ptls->current_task = ptls->root_task;
+    assert(ptls->root_task == NULL);
+    // We need `gcstack` in `Task` to allocate Julia objects; *including* the `Task` type.
+    // However, to allocate a `Task` via `jl_gc_alloc` as done in `jl_init_root_task`,
+    // we need the `Task` type itself. We use stack-allocated "raw" `jl_task_t` struct to
+    // workaround this chicken-and-egg problem. Note that this relies on GC to be turned
+    // off as GC fails because we don't/can't allocate the type tag.
+    struct {
+        jl_value_t *type;
+        jl_task_t value;
+    } bootstrap_task = {0};
+    jl_set_pgcstack(&bootstrap_task.value.gcstack);
+    bootstrap_task.value.ptls = ptls;
+    if (jl_nothing == NULL) // make a placeholder
+        jl_nothing = jl_gc_permobj(0, jl_nothing_type);
+    jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
+    memset(ct, 0, sizeof(jl_task_t));
     void *stack = stack_lo;
     size_t ssize = (char*)stack_hi - (char*)stack_lo;
 #ifndef _OS_WINDOWS_
@@ -1230,32 +1285,40 @@ void jl_init_root_task(void *stack_lo, void *stack_hi)
     }
 #endif
     if (always_copy_stacks) {
-        ptls->current_task->copy_stack = 1;
-        ptls->current_task->stkbuf = NULL;
-        ptls->current_task->bufsz = 0;
+        ct->copy_stack = 1;
+        ct->stkbuf = NULL;
+        ct->bufsz = 0;
     }
     else {
-        ptls->current_task->copy_stack = 0;
-        ptls->current_task->stkbuf = stack;
-        ptls->current_task->bufsz = ssize;
+        ct->copy_stack = 0;
+        ct->stkbuf = stack;
+        ct->bufsz = ssize;
     }
-    ptls->current_task->started = 1;
-    ptls->current_task->next = jl_nothing;
-    ptls->current_task->queue = jl_nothing;
-    ptls->current_task->_state = JL_TASK_STATE_RUNNABLE;
-    ptls->current_task->start = NULL;
-    ptls->current_task->result = jl_nothing;
-    ptls->current_task->donenotify = jl_nothing;
-    ptls->current_task->_isexception = 0;
-    ptls->current_task->logstate = jl_nothing;
-    ptls->current_task->eh = NULL;
-    ptls->current_task->gcstack = NULL;
-    ptls->current_task->excstack = NULL;
-    ptls->current_task->tid = ptls->tid;
-    ptls->current_task->sticky = 1;
-
-#ifdef JL_TSAN_ENABLED
-    ptls->current_task->tsan_state = __tsan_get_current_fiber();
+    ct->started = 1;
+    ct->next = jl_nothing;
+    ct->queue = jl_nothing;
+    ct->tls = jl_nothing;
+    ct->_state = JL_TASK_STATE_RUNNABLE;
+    ct->start = NULL;
+    ct->result = jl_nothing;
+    ct->donenotify = jl_nothing;
+    ct->_isexception = 0;
+    ct->logstate = jl_nothing;
+    ct->eh = NULL;
+    ct->gcstack = NULL;
+    ct->excstack = NULL;
+    ct->tid = ptls->tid;
+    ct->sticky = 1;
+    ct->ptls = ptls;
+    ct->world_age = 1; // OK to run Julia code on this task
+    ptls->root_task = ct;
+    ptls->current_task = ct;
+    JL_GC_PROMISE_ROOTED(ct);
+    jl_set_pgcstack(&ct->gcstack);
+    assert(jl_current_task == ct);
+
+#ifdef _COMPILER_TSAN_ENABLED_
+    ct->tsan_state = __tsan_get_current_fiber();
 #endif
 
 #ifdef COPY_STACKS
diff --git a/src/threading.c b/src/threading.c
index bffc1b02d909a8..ffe53c07b45ee3 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -36,6 +36,32 @@ extern "C" {
 
 #include "threading.h"
 
+JL_DLLEXPORT void *jl_get_ptls_states(void)
+{
+    // mostly deprecated: use current_task instead
+    return jl_current_task->ptls;
+}
+
+#if !defined(_OS_WINDOWS_)
+static pthread_key_t jl_safe_restore_key;
+
+__attribute__((constructor)) void _jl_init_safe_restore(void)
+{
+    pthread_key_create(&jl_safe_restore_key, NULL);
+}
+
+JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
+{
+    return (jl_jmp_buf*)pthread_getspecific(jl_safe_restore_key);
+}
+
+JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
+{
+    pthread_setspecific(jl_safe_restore_key, (void*)sr);
+}
+#endif
+
+
 // The tls_states buffer:
 //
 // On platforms that do not use ELF (i.e. where `__thread` is emulated with
@@ -53,39 +79,42 @@ extern "C" {
 // Mac doesn't seem to have static TLS model so the runtime TLS getter
 // registration will only add overhead to TLS access. The `__thread` variables
 // are emulated with `pthread_key_t` so it is actually faster to use it directly.
-static pthread_key_t jl_tls_key;
+static pthread_key_t jl_pgcstack_key;
 
-__attribute__((constructor)) void jl_mac_init_tls(void)
+__attribute__((constructor)) void jl_init_tls(void)
 {
-    pthread_key_create(&jl_tls_key, NULL);
+    pthread_key_create(&jl_pgcstack_key, NULL);
 }
 
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED
+JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
 {
-    void *ptls = pthread_getspecific(jl_tls_key);
-    if (__unlikely(!ptls)) {
-        ptls = calloc(1, sizeof(jl_tls_states_t));
-        pthread_setspecific(jl_tls_key, ptls);
-    }
-    return (jl_ptls_t)ptls;
+    return pthread_getspecific(jl_pgcstack_key);
 }
 
-// This is only used after the tls is already initialized on the thread
-static JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_fast(void) JL_NOTSAFEPOINT
+void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
 {
-    return (jl_ptls_t)pthread_getspecific(jl_tls_key);
+    pthread_setspecific(jl_pgcstack_key, (void*)pgcstack);
 }
 
-jl_get_ptls_states_func jl_get_ptls_states_getter(void)
+void jl_pgcstack_getkey(jl_get_pgcstack_func **f, pthread_key_t *k)
 {
     // for codegen
-    return &jl_get_ptls_states_fast;
+    *f = pthread_getspecific;
+    *k = jl_pgcstack_key;
 }
+
+
+JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, pthread_key_t k)
+{
+    jl_safe_printf("ERROR: Attempt to change TLS address.\n");
+}
+
 #elif defined(_OS_WINDOWS_)
 // Apparently windows doesn't have a static TLS model (or one that can be
 // reliably used from a shared library) either..... Use `TLSAlloc` instead.
 
-static DWORD jl_tls_key;
+static DWORD jl_pgcstack_key;
+static DWORD jl_safe_restore_key;
 
 // Put this here for now. We can move this out later if we find more use for it.
 BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason,
@@ -93,51 +122,84 @@ BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason,
 {
     switch (nReason) {
     case DLL_PROCESS_ATTACH:
-        jl_tls_key = TlsAlloc();
-        assert(jl_tls_key != TLS_OUT_OF_INDEXES);
+        jl_pgcstack_key = TlsAlloc();
+        assert(jl_pgcstack_key != TLS_OUT_OF_INDEXES);
+        jl_safe_restore_key = TlsAlloc();
+        assert(jl_safe_restore_key != TLS_OUT_OF_INDEXES);
         // Fall through
     case DLL_THREAD_ATTACH:
-        TlsSetValue(jl_tls_key, calloc(1, sizeof(jl_tls_states_t)));
         break;
     case DLL_THREAD_DETACH:
-        free(TlsGetValue(jl_tls_key));
-        TlsSetValue(jl_tls_key, NULL);
         break;
     case DLL_PROCESS_DETACH:
-        free(TlsGetValue(jl_tls_key));
-        TlsFree(jl_tls_key);
+        TlsFree(jl_pgcstack_key);
+        TlsFree(jl_safe_restore_key);
         break;
     }
     return 1; // success
 }
 
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED
-{
-#if defined(_CPU_X86_64_)
-    DWORD *plast_error = (DWORD*)(__readgsqword(0x30) + 0x68);
-    DWORD last_error = *plast_error;
-#elif defined(_CPU_X86_)
-    DWORD *plast_error = (DWORD*)(__readfsdword(0x18) + 0x34);
-    DWORD last_error = *plast_error;
-#else
-    DWORD last_error = GetLastError();
-#endif
-    jl_ptls_t state = (jl_ptls_t)TlsGetValue(jl_tls_key);
 #if defined(_CPU_X86_64_)
-    *plast_error = last_error;
+#define SAVE_ERRNO \
+    DWORD *plast_error = (DWORD*)(__readgsqword(0x30) + 0x68); \
+    DWORD last_error = *plast_error
+#define LOAD_ERRNO \
+    *plast_error = last_error
 #elif defined(_CPU_X86_)
-    *plast_error = last_error;
+#define SAVE_ERRNO \
+    DWORD *plast_error = (DWORD*)(__readfsdword(0x18) + 0x34); \
+    DWORD last_error = *plast_error
+#define LOAD_ERRNO \
+    *plast_error = last_error
 #else
-    SetLastError(last_error);
+#define SAVE_ERRNO \
+    DWORD last_error = GetLastError()
+#define LOAD_ERRNO \
+    SetLastError(last_error)
 #endif
-    return state;
+
+JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
+{
+    SAVE_ERRNO;
+    jl_jmp_buf *sr = (jl_jmp_buf*)TlsGetValue(jl_safe_restore_key);
+    LOAD_ERRNO;
+    return sr;
+}
+
+JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
+{
+    SAVE_ERRNO;
+    TlsSetValue(jl_safe_restore_key, (void*)sr);
+    LOAD_ERRNO;
+}
+
+JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
+{
+    SAVE_ERRNO;
+    jl_gcframe_t **pgcstack = (jl_ptls_t)TlsGetValue(jl_pgcstack_key);
+    LOAD_ERRNO;
+    return pgcstack;
 }
 
-jl_get_ptls_states_func jl_get_ptls_states_getter(void)
+void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
+{
+    // n.b.: this smashes GetLastError
+    TlsSetValue(jl_pgcstack_key, (void*)pgcstack);
+}
+
+void jl_pgcstack_getkey(jl_get_pgcstack_func **f, DWORD *k)
 {
     // for codegen
-    return &jl_get_ptls_states;
+    *f = jl_get_pgcstack;
+    *k = jl_pgcstack_key;
 }
+
+JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, DWORD k)
+{
+    jl_safe_printf("ERROR: Attempt to change TLS address.\n");
+}
+
+
 #else
 // We use the faster static version in the main executable to replace
 // the slower version in the shared object. The code in different libraries
@@ -145,89 +207,100 @@ jl_get_ptls_states_func jl_get_ptls_states_getter(void)
 // The general solution is to add one more indirection in the C entry point.
 //
 // When `ifunc` is available, we can use it to trick the linker to use the
-// real address (`jl_get_ptls_states_static`) directly as the symbol address.
+// real address (`jl_get_pgcstack_static`) directly as the symbol address.
 //
 // However, since the detection of the static version in `ifunc`
 // is not guaranteed to be reliable, we still need to fallback to the wrapper
 // version as the symbol address if we didn't find the static version in `ifunc`.
 
 // fallback provided for embedding
-static JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_fallback(void)
+static jl_pgcstack_key_t jl_pgcstack_key;
+static __thread jl_gcframe_t **pgcstack_;
+static jl_gcframe_t **jl_get_pgcstack_fallback(void) JL_NOTSAFEPOINT
+{
+    return pgcstack_;
+}
+static jl_gcframe_t ***jl_pgcstack_addr_fallback(void) JL_NOTSAFEPOINT
 {
-    static __thread jl_tls_states_t tls_states;
-    return &tls_states;
+    return &pgcstack_;
+}
+void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
+{
+    *jl_pgcstack_key() = pgcstack;
 }
 #  if JL_USE_IFUNC
-JL_DLLEXPORT JL_CONST_FUNC __attribute__((weak))
-jl_ptls_t jl_get_ptls_states_static(void);
+JL_DLLEXPORT __attribute__((weak))
+void jl_register_pgcstack_getter(void);
 #  endif
-static jl_ptls_t jl_get_ptls_states_init(void);
-static jl_get_ptls_states_func jl_tls_states_cb = jl_get_ptls_states_init;
-static jl_ptls_t jl_get_ptls_states_init(void)
+static jl_gcframe_t **jl_get_pgcstack_init(void);
+static jl_get_pgcstack_func *jl_get_pgcstack_cb = jl_get_pgcstack_init;
+static jl_gcframe_t **jl_get_pgcstack_init(void)
 {
     // This 2-step initialization is used to detect calling
-    // `jl_set_ptls_states_getter` after the address of the TLS variables
+    // `jl_pgcstack_getkey` after the address of the TLS variables
     // are used. Since the address of TLS variables should be constant,
     // changing the getter address can result in weird crashes.
 
     // This is clearly not thread safe but should be fine since we
     // make sure the tls states callback is finalized before adding
     // multiple threads
-    jl_get_ptls_states_func cb = jl_get_ptls_states_fallback;
 #  if JL_USE_IFUNC
-    if (jl_get_ptls_states_static)
-        cb = jl_get_ptls_states_static;
+    if (jl_register_pgcstack_getter)
+        jl_register_pgcstack_getter();
+    else
 #  endif
-    jl_tls_states_cb = cb;
-    return cb();
+    {
+        jl_get_pgcstack_cb = jl_get_pgcstack_fallback;
+        jl_pgcstack_key = &jl_pgcstack_addr_fallback;
+    }
+    return jl_get_pgcstack_cb();
 }
 
-JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f)
+JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, jl_pgcstack_key_t k)
 {
-    if (f == jl_tls_states_cb || !f)
+    if (f == jl_get_pgcstack_cb || !f)
         return;
     // only allow setting this once
-    if (jl_tls_states_cb == jl_get_ptls_states_init) {
-        jl_tls_states_cb = f;
-    }
-    else {
+    if (jl_get_pgcstack_cb != jl_get_pgcstack_init) {
         jl_safe_printf("ERROR: Attempt to change TLS address.\n");
         exit(1);
     }
+    jl_get_pgcstack_cb = f;
+    jl_pgcstack_key = k;
 }
 
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED
+JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack(void) JL_GLOBALLY_ROOTED
 {
 #ifndef __clang_analyzer__
-    return (*jl_tls_states_cb)();
+    return jl_get_pgcstack_cb();
 #endif
 }
 
-jl_get_ptls_states_func jl_get_ptls_states_getter(void)
+void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
 {
-    if (jl_tls_states_cb == jl_get_ptls_states_init)
-        jl_get_ptls_states_init();
+    if (jl_get_pgcstack_cb == jl_get_pgcstack_init)
+        jl_get_pgcstack_init();
     // for codegen
-    return jl_tls_states_cb;
+    *f = jl_get_pgcstack_cb;
+    *k = jl_pgcstack_key;
 }
 #endif
 
 jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
-uint8_t *jl_measure_compile_time = NULL;
-uint64_t *jl_cumulative_compile_time = NULL;
+uint8_t jl_measure_compile_time_enabled = 0;
+uint64_t jl_cumulative_compile_time = 0;
 
 // return calling thread's ID
 // Also update the suspended_threads list in signals-mach when changing the
 // type of the thread id.
 JL_DLLEXPORT int16_t jl_threadid(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return ptls->tid;
+    return jl_current_task->tid;
 }
 
-void jl_init_threadtls(int16_t tid)
+jl_ptls_t jl_init_threadtls(int16_t tid)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = (jl_ptls_t)calloc(1, sizeof(jl_tls_states_t));
     ptls->system_id = jl_thread_self();
     seed_cong(&ptls->rngseed);
 #ifdef _OS_WINDOWS_
@@ -240,10 +313,7 @@ void jl_init_threadtls(int16_t tid)
         }
     }
 #endif
-    assert(ptls->world_age == 0);
-    ptls->world_age = 1; // OK to run Julia code on this thread
     ptls->tid = tid;
-    ptls->pgcstack = NULL;
     ptls->gc_state = 0; // GC unsafe
     // Conditionally initialize the safepoint address. See comment in
     // `safepoint.c`
@@ -254,22 +324,16 @@ void jl_init_threadtls(int16_t tid)
         ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size * 2 +
                                     sizeof(size_t));
     }
-    ptls->defer_signal = 0;
     jl_bt_element_t *bt_data = (jl_bt_element_t*)
         malloc_s(sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1));
     memset(bt_data, 0, sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1));
     ptls->bt_data = bt_data;
-    ptls->sig_exception = NULL;
-    ptls->previous_exception = NULL;
-    ptls->next_task = NULL;
-#ifdef _OS_WINDOWS_
-    ptls->needs_resetstkoflw = 0;
-#endif
     small_arraylist_new(&ptls->locks, 0);
     jl_init_thread_heap(ptls);
-    jl_install_thread_signal_handler(ptls);
 
     jl_all_tls_states[tid] = ptls;
+
+    return ptls;
 }
 
 // lock for code generation
@@ -294,11 +358,11 @@ static inline size_t jl_add_tls_size(size_t orig_size, size_t size, size_t align
 {
     return LLT_ALIGN(orig_size, align) + size;
 }
-static inline ssize_t jl_check_tls_bound(void *tp, void *ptls, size_t tls_size)
+static inline ssize_t jl_check_tls_bound(void *tp, jl_gcframe_t ***k0, size_t tls_size)
 {
-    ssize_t offset = (char*)ptls - (char*)tp;
+    ssize_t offset = (char*)k0 - (char*)tp;
     if (offset < JL_ELF_TLS_INIT_SIZE ||
-        (size_t)offset + sizeof(jl_tls_states_t) > tls_size)
+        (size_t)offset + sizeof(*k0) > tls_size)
         return -1;
     return offset;
 }
@@ -309,10 +373,10 @@ static inline size_t jl_add_tls_size(size_t orig_size, size_t size, size_t align
 {
     return LLT_ALIGN(orig_size + size, align);
 }
-static inline ssize_t jl_check_tls_bound(void *tp, void *ptls, size_t tls_size)
+static inline ssize_t jl_check_tls_bound(void *tp, jl_gcframe_t ***k0, size_t tls_size)
 {
-    ssize_t offset = (char*)tp - (char*)ptls;
-    if (offset < sizeof(jl_tls_states_t) || offset > tls_size)
+    ssize_t offset = (char*)tp - (char*)k0;
+    if (offset < sizeof(*k0) || offset > tls_size)
         return -1;
     return -offset;
 }
@@ -347,7 +411,12 @@ static int check_tls_cb(struct dl_phdr_info *info, size_t size, void *_data)
 
 static void jl_check_tls(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_get_pgcstack_func *f;
+    jl_gcframe_t ***(*k)(void);
+    jl_pgcstack_getkey(&f, &k);
+    jl_gcframe_t ***k0 = k();
+    if (k0 == NULL)
+        return;
     check_tls_cb_t data = {0};
     dl_iterate_phdr(check_tls_cb, &data);
     if (data.total_size == 0)
@@ -364,7 +433,7 @@ static void jl_check_tls(void)
 #else
 #  error "Cannot emit thread pointer for this architecture."
 #endif
-    ssize_t offset = jl_check_tls_bound(tp, ptls, data.total_size);
+    ssize_t offset = jl_check_tls_bound(tp, k0, data.total_size);
     if (offset == -1)
         return;
     jl_tls_offset = offset;
@@ -398,16 +467,9 @@ void jl_init_threading(void)
     }
     if (jl_n_threads <= 0)
         jl_n_threads = 1;
-    jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time));
-    jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time));
 #ifndef __clang_analyzer__
     jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
 #endif
-    // initialize this thread (set tid, create heap, etc.)
-    jl_init_threadtls(0);
-
-    // initialize threading infrastructure
-    jl_init_threadinginfra();
 }
 
 static uv_barrier_t thread_init_done;
diff --git a/src/threading.h b/src/threading.h
index 43516ccceebb1e..4c6f1e19881f5c 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -21,7 +21,7 @@ typedef struct _jl_threadarg_t {
 } jl_threadarg_t;
 
 // each thread must initialize its TLS
-void jl_init_threadtls(int16_t tid);
+jl_ptls_t jl_init_threadtls(int16_t tid);
 
 // provided by a threading infrastructure
 void jl_init_threadinginfra(void);
diff --git a/src/timing.c b/src/timing.c
index 12f47bbfa00865..12093d2e142c09 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -48,7 +48,7 @@ void jl_init_timing(void)
 
 void jl_destroy_timing(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_timing_block_t *stack = ptls->timing_stack;
     while (stack) {
         _jl_timing_block_destroy(stack);
diff --git a/src/timing.h b/src/timing.h
index 9a3307709a38fd..fd84707ad5d2c2 100644
--- a/src/timing.h
+++ b/src/timing.h
@@ -116,8 +116,8 @@ STATIC_INLINE uint64_t _jl_timing_block_init(jl_timing_block_t *block, int owner
 
 STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) {
     uint64_t t = _jl_timing_block_init(block, owner);
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_timing_block_t **prevp = &ptls->timing_stack;
+    jl_task_t *ct = jl_current_task;
+    jl_timing_block_t **prevp = &ct->ptls->timing_stack;
     block->prev = *prevp;
     if (block->prev)
         _jl_timing_block_stop(block->prev, t);
@@ -126,10 +126,10 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) {
 
 STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) {
     uint64_t t = cycleclock();
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     _jl_timing_block_stop(block, t);
     jl_timing_data[block->owner] += block->total;
-    jl_timing_block_t **pcur = &ptls->timing_stack;
+    jl_timing_block_t **pcur = &ct->ptls->timing_stack;
     assert(*pcur == block);
     *pcur = block->prev;
     if (block->prev)
diff --git a/src/tls.h b/src/tls.h
deleted file mode 100644
index 7f14d4acd7dc55..00000000000000
--- a/src/tls.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#ifndef JL_TLS_H
-#define JL_TLS_H
-
-// Thread-local storage access
-
-typedef struct _jl_tls_states_t jl_tls_states_t;
-
-typedef jl_tls_states_t *jl_ptls_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-JL_DLLEXPORT int16_t jl_threadid(void);
-JL_DLLEXPORT void jl_threading_profile(void);
-
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
-
-typedef jl_ptls_t (*jl_get_ptls_states_func)(void);
-#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
-JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/toplevel.c b/src/toplevel.c
index 88ace36d193a88..c11dea57c84895 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -66,11 +66,12 @@ void jl_module_run_initializer(jl_module_t *m)
     jl_function_t *f = jl_module_get_initializer(m);
     if (f == NULL)
         return;
-    size_t last_age = jl_get_ptls_states()->world_age;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
     JL_TRY {
-        jl_get_ptls_states()->world_age = jl_world_counter;
+        ct->world_age = jl_world_counter;
         jl_apply(&f, 1);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
     }
     JL_CATCH {
         if (jl_initerror_type == NULL) {
@@ -115,7 +116,7 @@ static int jl_is__toplevel__mod(jl_module_t *mod)
 // TODO: add locks around global state mutation operations
 static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(ex->head == module_sym);
     if (jl_array_len(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) {
         jl_error("syntax: malformed module expression");
@@ -148,8 +149,8 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         newm->parent = parent_module;
         jl_binding_t *b = jl_get_binding_wr(parent_module, name, 1);
         jl_declare_constant(b);
-        jl_value_t *old = jl_atomic_compare_exchange(&b->value, NULL, (jl_value_t*)newm);
-        if (old != NULL) {
+        jl_value_t *old = NULL;
+        if (!jl_atomic_cmpswap(&b->value, &old, (jl_value_t*)newm)) {
             if (!jl_is_module(old)) {
                 jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
             }
@@ -173,7 +174,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         jl_base_module = newm;
     }
 
-    size_t last_age = ptls->world_age;
+    size_t last_age = ct->world_age;
 
     // add standard imports unless baremodule
     if (std_imports) {
@@ -189,13 +190,13 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args;
     for (int i = 0; i < jl_array_len(exprs); i++) {
         // process toplevel form
-        ptls->world_age = jl_world_counter;
+        ct->world_age = jl_world_counter;
         form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, jl_filename, jl_lineno);
-        ptls->world_age = jl_world_counter;
+        ct->world_age = jl_world_counter;
         (void)jl_toplevel_eval_flex(newm, form, 1, 1);
     }
     newm->primary_world = jl_world_counter;
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
 
 #if 0
     // some optional post-processing steps
@@ -267,7 +268,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
 
 static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 3);
     args[1] = jl_toplevel_eval_flex(m, x, fast, 0);
@@ -278,10 +279,10 @@ static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f
     }
     else {
         args[0] = jl_eval_global_var(jl_base_relative_to(m), jl_symbol("getproperty"));
-        size_t last_age = ptls->world_age;
-        ptls->world_age = jl_world_counter;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_world_counter;
         args[0] = jl_apply(args, 3);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     JL_GC_POP();
     return args[0];
@@ -411,19 +412,19 @@ static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_RO
     static jl_value_t *require_func = NULL;
     int build_mode = jl_generating_output();
     jl_module_t *m = NULL;
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (require_func == NULL && jl_base_module != NULL) {
         require_func = jl_get_global(jl_base_module, jl_symbol("require"));
     }
     if (require_func != NULL) {
-        size_t last_age = ptls->world_age;
-        ptls->world_age = (build_mode ? jl_base_module->primary_world : jl_world_counter);
+        size_t last_age = ct->world_age;
+        ct->world_age = (build_mode ? jl_base_module->primary_world : jl_world_counter);
         jl_value_t *reqargs[3];
         reqargs[0] = require_func;
         reqargs[1] = (jl_value_t*)mod;
         reqargs[2] = (jl_value_t*)var;
         m = (jl_module_t*)jl_apply(reqargs, 3);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     if (m == NULL || !jl_is_module(m)) {
         jl_errorf("failed to load module %s", jl_symbol_name(var));
@@ -619,7 +620,7 @@ static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
 
 jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (!jl_is_expr(e)) {
         if (jl_is_linenode(e)) {
             jl_lineno = jl_linenode_line(e);
@@ -652,7 +653,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         }
     }
 
-    if (ptls->in_pure_callback) {
+    if (ct->ptls->in_pure_callback) {
         jl_error("eval cannot be used in a generated function");
     }
 
@@ -660,11 +661,11 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
     jl_code_info_t *thk = NULL;
     JL_GC_PUSH3(&mfunc, &thk, &ex);
 
-    size_t last_age = ptls->world_age;
+    size_t last_age = ct->world_age;
     if (!expanded && jl_needs_lowering(e)) {
-        ptls->world_age = jl_world_counter;
+        ct->world_age = jl_world_counter;
         ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, jl_filename, jl_lineno);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     jl_sym_t *head = jl_is_expr(ex) ? ex->head : NULL;
 
@@ -868,12 +869,12 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         // TODO: This is still not correct since an `eval` can happen elsewhere, but it
         // helps in common cases.
         size_t world = jl_world_counter;
-        ptls->world_age = world;
+        ct->world_age = world;
         if (!has_defs && jl_get_module_infer(m) != 0) {
             (void)jl_type_infer(mfunc, world, 0);
         }
         result = jl_invoke(/*func*/NULL, /*args*/NULL, /*nargs*/0, mfunc);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     else {
         // use interpreter
@@ -921,10 +922,17 @@ static void jl_check_open_for(jl_module_t *m, const char* funcname)
     }
 }
 
+JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname)
+{
+    if (jl_current_task->ptls->in_pure_callback)
+        jl_errorf("%s cannot be used in a generated function", fname);
+    jl_check_open_for(m, fname);
+}
+
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->in_pure_callback)
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->in_pure_callback)
         jl_error("eval cannot be used in a generated function");
     jl_check_open_for(m, "eval");
     jl_value_t *v = NULL;
@@ -951,7 +959,8 @@ JL_DLLEXPORT jl_value_t *jl_infer_thunk(jl_code_info_t *thk, jl_module_t *m)
     jl_method_instance_t *li = method_instance_for_thunk(thk, m);
     JL_GC_PUSH1(&li);
     jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
-    jl_code_info_t *src = jl_type_infer(li, jl_get_ptls_states()->world_age, 0);
+    jl_task_t *ct = jl_current_task;
+    jl_code_info_t *src = jl_type_infer(li, ct->world_age, 0);
     JL_GC_POP();
     if (src)
         return src->rettype;
@@ -971,8 +980,8 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     if (!jl_is_string(text) || !jl_is_string(filename)) {
         jl_errorf("Expected `String`s for `text` and `filename`");
     }
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->in_pure_callback)
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->in_pure_callback)
         jl_error("cannot use include inside a generated function");
     jl_check_open_for(module, "include");
 
@@ -989,7 +998,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
 
     int last_lineno = jl_lineno;
     const char *last_filename = jl_filename;
-    size_t last_age = jl_get_ptls_states()->world_age;
+    size_t last_age = ct->world_age;
     int lineno = 0;
     jl_lineno = 0;
     jl_filename = jl_string_data(filename);
@@ -1006,7 +1015,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
             }
             expression = jl_expand_with_loc_warn(expression, module,
                                                  jl_string_data(filename), lineno);
-            jl_get_ptls_states()->world_age = jl_world_counter;
+            ct->world_age = jl_world_counter;
             result = jl_toplevel_eval_flex(module, expression, 1, 1);
         }
     }
@@ -1016,7 +1025,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
         goto finally; // skip jl_restore_excstack
     }
 finally:
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
     jl_lineno = last_lineno;
     jl_filename = last_filename;
     if (err) {
diff --git a/src/typemap.c b/src/typemap.c
index 50fb9635ff210c..58dd2b8b13069f 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -65,7 +65,7 @@ static int jl_type_extract_name_precise(jl_value_t *t1, int invariant)
     }
     else if (jl_is_datatype(t1)) {
         jl_datatype_t *dt = (jl_datatype_t*)t1;
-        if ((invariant || !dt->abstract) && !jl_is_kind(t1))
+        if ((invariant || !dt->name->abstract) && !jl_is_kind(t1))
             return 1;
         return 0;
     }
@@ -1087,16 +1087,13 @@ static unsigned jl_typemap_list_count_locked(jl_typemap_entry_t *ml) JL_NOTSAFEP
     return count;
 }
 
-static void jl_typemap_level_insert_(jl_typemap_t *map, jl_typemap_level_t *cache, jl_typemap_entry_t *newrec, int8_t offs, const struct jl_typemap_info *tparams);
-static void jl_typemap_list_insert_sorted(
-        jl_typemap_t *map, jl_typemap_entry_t **pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, const struct jl_typemap_info *tparams);
+static void jl_typemap_level_insert_(jl_typemap_t *map, jl_typemap_level_t *cache, jl_typemap_entry_t *newrec, int8_t offs);
 
 static jl_typemap_level_t *jl_new_typemap_level(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_typemap_level_t *cache =
-        (jl_typemap_level_t*)jl_gc_alloc(ptls, sizeof(jl_typemap_level_t),
+        (jl_typemap_level_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_level_t),
                                          jl_typemap_level_type);
     cache->arg1 = (jl_array_t*)jl_an_empty_vec_any;
     cache->targ = (jl_array_t*)jl_an_empty_vec_any;
@@ -1108,8 +1105,7 @@ static jl_typemap_level_t *jl_new_typemap_level(void)
 }
 
 static jl_typemap_level_t *jl_method_convert_list_to_cache(
-        jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t offs,
-        const struct jl_typemap_info *tparams)
+        jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t offs)
 {
     jl_typemap_level_t *cache = jl_new_typemap_level();
     jl_typemap_entry_t *next = NULL;
@@ -1118,7 +1114,7 @@ static jl_typemap_level_t *jl_method_convert_list_to_cache(
         next = ml->next;
         ml->next = (jl_typemap_entry_t*)jl_nothing;
         // TODO: is it safe to be doing this concurrently with lookups?
-        jl_typemap_level_insert_(map, cache, ml, offs, tparams);
+        jl_typemap_level_insert_(map, cache, ml, offs);
         ml = next;
     }
     JL_GC_POP();
@@ -1127,26 +1123,29 @@ static jl_typemap_level_t *jl_method_convert_list_to_cache(
 
 static void jl_typemap_list_insert_(
         jl_typemap_t *map, jl_typemap_entry_t **pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, const struct jl_typemap_info *tparams)
+        jl_typemap_entry_t *newrec)
 {
-    if (*pml == (void*)jl_nothing || newrec->isleafsig || (tparams && tparams->unsorted)) {
-        newrec->next = *pml;
-        jl_gc_wb(newrec, newrec->next);
-        jl_atomic_store_release(pml, newrec);
-        jl_gc_wb(parent, newrec);
-    }
-    else {
-        jl_typemap_list_insert_sorted(map, pml, parent, newrec, tparams);
+    jl_typemap_entry_t *l = *pml;
+    while ((jl_value_t*)l != jl_nothing) {
+        if (newrec->isleafsig || !l->isleafsig)
+            if (newrec->issimplesig || !l->issimplesig)
+                break;
+        pml = &l->next;
+        parent = (jl_value_t*)l;
+        l = l->next;
     }
+    newrec->next = l;
+    jl_gc_wb(newrec, newrec->next);
+    jl_atomic_store_release(pml, newrec);
+    jl_gc_wb(parent, newrec);
 }
 
 static void jl_typemap_insert_generic(
         jl_typemap_t *map, jl_typemap_t **pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, int8_t offs,
-        const struct jl_typemap_info *tparams)
+        jl_typemap_entry_t *newrec, int8_t offs)
 {
     if (jl_typeof(*pml) == (jl_value_t*)jl_typemap_level_type) {
-        jl_typemap_level_insert_(map, (jl_typemap_level_t*)*pml, newrec, offs, tparams);
+        jl_typemap_level_insert_(map, (jl_typemap_level_t*)*pml, newrec, offs);
         return;
     }
 
@@ -1154,31 +1153,29 @@ static void jl_typemap_insert_generic(
     if (count > MAX_METHLIST_COUNT) {
         *pml = (jl_typemap_t*)jl_method_convert_list_to_cache(
             map, (jl_typemap_entry_t *)*pml,
-            offs, tparams);
+            offs);
         jl_gc_wb(parent, *pml);
-        jl_typemap_level_insert_(map, (jl_typemap_level_t*)*pml, newrec, offs, tparams);
+        jl_typemap_level_insert_(map, (jl_typemap_level_t*)*pml, newrec, offs);
         return;
     }
 
     jl_typemap_list_insert_(map, (jl_typemap_entry_t **)pml,
-        parent, newrec, tparams);
+        parent, newrec);
 }
 
 static void jl_typemap_array_insert_(
         jl_typemap_t *map, jl_array_t **cache, jl_value_t *key, jl_typemap_entry_t *newrec,
-        jl_value_t *parent, int8_t offs,
-        const struct jl_typemap_info *tparams)
+        jl_value_t *parent, int8_t offs)
 {
     jl_typemap_t **pml = mtcache_hash_lookup_bp(*cache, key);
     if (pml != NULL)
-        jl_typemap_insert_generic(map, pml, (jl_value_t*)*cache, newrec, offs+1, tparams);
+        jl_typemap_insert_generic(map, pml, (jl_value_t*)*cache, newrec, offs+1);
     else
         mtcache_hash_insert(cache, parent, key, (jl_typemap_t*)newrec);
 }
 
 static void jl_typemap_level_insert_(
-        jl_typemap_t *map, jl_typemap_level_t *cache, jl_typemap_entry_t *newrec, int8_t offs,
-        const struct jl_typemap_info *tparams)
+        jl_typemap_t *map, jl_typemap_level_t *cache, jl_typemap_entry_t *newrec, int8_t offs)
 {
     jl_value_t *ttypes = jl_unwrap_unionall((jl_value_t*)newrec->sig);
     size_t l = jl_nparams(ttypes);
@@ -1205,7 +1202,7 @@ static void jl_typemap_level_insert_(
         t1 = (jl_value_t*)jl_assume(jl_typeofbottom_type)->super;
     // If the type at `offs` is Any, put it in the Any list
     if (t1 && jl_is_any(t1)) {
-        jl_typemap_insert_generic(map, &cache->any, (jl_value_t*)cache, newrec, offs+1, tparams);
+        jl_typemap_insert_generic(map, &cache->any, (jl_value_t*)cache, newrec, offs+1);
         return;
     }
     // Don't put Varargs in the optimized caches (too hard to handle in lookup and bp)
@@ -1216,12 +1213,12 @@ static void jl_typemap_level_insert_(
             // and we use the table indexed for that purpose.
             jl_value_t *a0 = jl_tparam0(t1);
             if (is_cache_leaf(a0, 1)) {
-                jl_typemap_array_insert_(map, &cache->targ, a0, newrec, (jl_value_t*)cache, offs, tparams);
+                jl_typemap_array_insert_(map, &cache->targ, a0, newrec, (jl_value_t*)cache, offs);
                 return;
             }
         }
         if (is_cache_leaf(t1, 0)) {
-            jl_typemap_array_insert_(map, &cache->arg1, t1, newrec, (jl_value_t*)cache, offs, tparams);
+            jl_typemap_array_insert_(map, &cache->arg1, t1, newrec, (jl_value_t*)cache, offs);
             return;
         }
 
@@ -1231,23 +1228,23 @@ static void jl_typemap_level_insert_(
         if (jl_is_type_type(t1)) {
             a0 = jl_type_extract_name(jl_tparam0(t1));
             jl_datatype_t *super = a0 ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper) : jl_any_type;
-            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, offs, tparams);
+            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, offs);
             return;
         }
         a0 = jl_type_extract_name(t1);
         if (a0 && a0 != (jl_value_t*)jl_any_type->name) {
-            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, offs, tparams);
+            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, offs);
             return;
         }
     }
-    jl_typemap_list_insert_(map, &cache->linear, (jl_value_t*)cache, newrec, tparams);
+    jl_typemap_list_insert_(map, &cache->linear, (jl_value_t*)cache, newrec);
 }
 
 jl_typemap_entry_t *jl_typemap_alloc(
         jl_tupletype_t *type, jl_tupletype_t *simpletype, jl_svec_t *guardsigs,
         jl_value_t *newvalue, size_t min_world, size_t max_world)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(min_world > 0 && max_world > 0);
     if (!simpletype)
         simpletype = (jl_tupletype_t*)jl_nothing;
@@ -1273,7 +1270,7 @@ jl_typemap_entry_t *jl_typemap_alloc(
     }
 
     jl_typemap_entry_t *newrec =
-        (jl_typemap_entry_t*)jl_gc_alloc(ptls, sizeof(jl_typemap_entry_t),
+        (jl_typemap_entry_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_entry_t),
                                          jl_typemap_entry_type);
     newrec->sig = type;
     newrec->simplesig = simpletype;
@@ -1289,43 +1286,9 @@ jl_typemap_entry_t *jl_typemap_alloc(
 }
 
 void jl_typemap_insert(jl_typemap_t **cache, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, int8_t offs,
-        const struct jl_typemap_info *tparams)
-{
-    jl_typemap_insert_generic(*cache, cache, parent, newrec, offs, tparams);
-}
-
-static void jl_typemap_list_insert_sorted(
-        jl_typemap_t *map, jl_typemap_entry_t **pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, const struct jl_typemap_info *tparams)
+        jl_typemap_entry_t *newrec, int8_t offs)
 {
-    jl_typemap_entry_t *l, **pl;
-    pl = pml;
-    l = *pml;
-    jl_value_t *pa = parent;
-    while ((jl_value_t*)l != jl_nothing) {
-        if (!l->isleafsig) { // quickly ignore all of the leafsig entries (these were handled by caller)
-            if (jl_type_morespecific((jl_value_t*)newrec->sig, (jl_value_t*)l->sig)) {
-                if (l->simplesig == (void*)jl_nothing ||
-                    newrec->simplesig != (void*)jl_nothing ||
-                    !jl_types_equal((jl_value_t*)l->sig, (jl_value_t*)newrec->sig)) {
-                    // might need to insert multiple entries for a lookup differing only by their simplesig
-                    // when simplesig contains a kind
-                    // TODO: make this test more correct or figure out a better way to compute this
-                    break;
-                }
-            }
-        }
-        pl = &l->next;
-        pa = (jl_value_t*)l;
-        l = l->next;
-    }
-
-    // insert newrec at the first point it is more specific than the following method
-    newrec->next = l;
-    jl_gc_wb(newrec, l);
-    jl_atomic_store_release(pl, newrec);
-    jl_gc_wb(pa, newrec);
+    jl_typemap_insert_generic(*cache, cache, parent, newrec, offs);
 }
 
 #ifdef __cplusplus
diff --git a/src/uprobes.d b/src/uprobes.d
new file mode 100644
index 00000000000000..ef4c59344bc7d5
--- /dev/null
+++ b/src/uprobes.d
@@ -0,0 +1,18 @@
+/* Julia DTrace provider */
+
+provider julia {
+    probe gc__begin(int collection)
+    probe gc__stop_the_world()
+    probe gc__mark__begin()
+    probe gc__mark__end(int64 scanned_bytes, int64 perm_scanned_bytes)
+    probe gc__sweep__begin(int full)
+    probe gc__sweep__end()
+    probe gc__end()
+    probe gc__finalizer()
+}
+
+#pragma D attributes Evolving/Evolving/Common provider julia provider
+#pragma D attributes Evolving/Evolving/Common provider julia module
+#pragma D attributes Evolving/Evolving/Common provider julia function
+#pragma D attributes Evolving/Evolving/Common provider julia name
+#pragma D attributes Evolving/Evolving/Common provider julia argst
diff --git a/stdlib/.gitignore b/stdlib/.gitignore
index 59354288f912c8..891eda58c689dc 100644
--- a/stdlib/.gitignore
+++ b/stdlib/.gitignore
@@ -13,5 +13,7 @@
 /Tar
 /NetworkOptions-*
 /NetworkOptions
+/SuiteSparse-*
+/SuiteSparse
 /*_jll/StdlibArtifacts.toml
 /*/Manifest.toml
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index fd65494782d92d..645e77944208bd 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -73,7 +73,7 @@ a `~/.julia/artifacts/Override.toml` file with the following contents:
 This file defines four overrides; two which override specific artifacts identified
 through their content hashes, two which override artifacts based on their bound names
 within a particular package's UUID.  In both cases, there are two different targets of
-the override: overriding to an on-disk location through an absolutet path, and
+the override: overriding to an on-disk location through an absolute path, and
 overriding to another artifact by its content-hash.
 """
 const ARTIFACT_OVERRIDES = Ref{Union{Dict{Symbol,Any},Nothing}}(nothing)
@@ -418,7 +418,7 @@ collapsed artifact.  Returns `nothing` if no mapping can be found.
 """
 function artifact_hash(name::String, artifacts_toml::String;
                        platform::AbstractPlatform = HostPlatform(),
-                       pkg_uuid::Union{Base.UUID,Nothing}=nothing)
+                       pkg_uuid::Union{Base.UUID,Nothing}=nothing)::Union{Nothing, SHA1}
     meta = artifact_meta(name, artifacts_toml; platform=platform)
     if meta === nothing
         return nothing
diff --git a/stdlib/Artifacts/test/runtests.jl b/stdlib/Artifacts/test/runtests.jl
index 36b5a0d3281fff..7527b548061ce4 100644
--- a/stdlib/Artifacts/test/runtests.jl
+++ b/stdlib/Artifacts/test/runtests.jl
@@ -157,3 +157,10 @@ end
         end
     end
 end
+
+@testset "`Artifacts.artifact_names` and friends" begin
+    n = length(Artifacts.artifact_names)
+    @test length(Base.project_names) == n
+    @test length(Base.manifest_names) == n
+    @test length(Base.preferences_names) == n
+end
diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml
index 87c7bd506a4f57..15ca525723c072 100644
--- a/stdlib/CompilerSupportLibraries_jll/Project.toml
+++ b/stdlib/CompilerSupportLibraries_jll/Project.toml
@@ -1,6 +1,10 @@
 name = "CompilerSupportLibraries_jll"
 uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
-version = "0.4.0+0"
+
+# NOTE: When updating this, also make sure to update the value
+# `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable
+# automatic usage of BB-built CSLs on extremely up-to-date systems!
+version = "0.5.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
@@ -8,3 +12,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index c34be7b9ebef4a..4975f175bbf16a 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -83,7 +83,8 @@ julia> Date(Dates.Month(7),Dates.Year(2013))
 [`Date`](@ref) or [`DateTime`](@ref) parsing is accomplished by the use of format strings. Format
 strings work by the notion of defining *delimited* or *fixed-width* "slots" that contain a period
 to parse and passing the text to parse and format string to a [`Date`](@ref) or [`DateTime`](@ref)
-constructor, of the form `Date("2015-01-01","y-m-d")` or `DateTime("20150101","yyyymmdd")`.
+constructor, of the form `Date("2015-01-01",dateformat"y-m-d")` or
+`DateTime("20150101",dateformat"yyyymmdd")`.
 
 Delimited slots are marked by specifying the delimiter the parser should expect between two subsequent
 periods; so `"y-m-d"` lets the parser know that between the first and second slots in a date string
@@ -92,14 +93,14 @@ parser know which periods to parse in each slot.
 
 As in the case of constructors above such as `Date(2013)`, delimited `DateFormat`s allow for
 missing parts of dates and times so long as the preceding parts are given. The other parts are given the usual
-default values.  For example, `Date("1981-03", "y-m-d")` returns `1981-03-01`, whilst
-`Date("31/12", "d/m/y")` gives `0001-12-31`.  (Note that the default year is
+default values.  For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
+`Date("31/12", dateformat"d/m/y")` gives `0001-12-31`.  (Note that the default year is
 1 AD/CE.)
 Consequently, an empty string will always return `0001-01-01` for `Date`s,
 and `0001-01-01T00:00:00.000` for `DateTime`s.
 
 Fixed-width slots are specified by repeating the period character the number of times corresponding
-to the width with no delimiter between characters. So `"yyyymmdd"` would correspond to a date
+to the width with no delimiter between characters. So `dateformat"yyyymmdd"` would correspond to a date
 string like `"20140716"`. The parser distinguishes a fixed-width slot by the absence of a delimiter,
 noting the transition `"yyyymm"` from one period character to the next.
 
@@ -110,10 +111,16 @@ supported, so `u` corresponds to "Jan", "Feb", "Mar", etc. And `U` corresponds t
 custom locales can be loaded by passing in the `locale=>Dict{String,Int}` mapping to the `MONTHTOVALUEABBR`
 and `MONTHTOVALUE` dicts for abbreviated and full-name month names, respectively.
 
-One note on parsing performance: using the `Date(date_string,format_string)` function is fine
-if only called a few times. If there are many similarly formatted date strings to parse however,
-it is much more efficient to first create a [`Dates.DateFormat`](@ref), and pass it instead of
-a raw format string.
+The above examples used the `dateformat""` string macro. This macro creates a `DateFormat` object once when
+the macro is expanded and uses the same `DateFormat` object even if a code snippet is run multiple times.
+
+```jldoctest
+julia> for i = 1:10^5
+           Date("2015-01-01", dateformat"y-m-d")
+       end
+```
+
+Or you can create the DateFormat object explicitly:
 
 ```jldoctest
 julia> df = DateFormat("y-m-d");
@@ -125,14 +132,21 @@ julia> dt2 = Date("2015-01-02",df)
 2015-01-02
 ```
 
-You can also use the `dateformat""` string macro. This macro creates the `DateFormat` object once when the macro is expanded and uses the same `DateFormat` object even if a code snippet is run multiple times.
+Alternatively, use broadcasting:
 
 ```jldoctest
-julia> for i = 1:10^5
-           Date("2015-01-01", dateformat"y-m-d")
-       end
+julia> years = ["2015", "2016"];
+
+julia> Date.(years, DateFormat("yyyy"))
+2-element Vector{Date}:
+ 2015-01-01
+ 2016-01-01
 ```
 
+For convenience, you may pass the format string directly (e.g., `Date("2015-01-01","y-m-d")`),
+although this form incurs performance costs if you are parsing the same format repeatedly, as
+it internally creates a new `DateFormat` object each time.
+
 As well as via the constructors, a `Date` or `DateTime` can be constructed from
 strings using the [`parse`](@ref) and [`tryparse`](@ref) functions, but with
 an optional third argument of type `DateFormat` specifying the format; for example,
@@ -565,6 +579,26 @@ julia> Dates.value(Dates.Millisecond(10))
 10
 ```
 
+Representing periods or durations that are not integer multiples of the basic types can be achieved
+with the [`Dates.CompoundPeriod`](@ref) type. Compound periods may be constructed manually from simple
+[`Period`](@ref) types. Additionally, the [`canonicalize`](@ref) function can be used to break down a
+period into a [`Dates.CompoundPeriod`](@ref). This is particularly useful to convert a duration, e.g.,
+a difference of two `DateTime`, into a more convenient representation.
+
+```jldoctest
+julia> cp = Dates.CompoundPeriod(Day(1),Minute(1))
+1 day, 1 minute
+
+julia> t1 = DateTime(2018,8,8,16,58,00)
+2018-08-08T16:58:00
+
+julia> t2 = DateTime(2021,6,23,10,00,00)
+2021-06-23T10:00:00
+
+julia> canonicalize(t2-t1) # creates a CompoundPeriod
+149 weeks, 6 days, 17 hours, 2 minutes
+```
+
 ## Rounding
 
 [`Date`](@ref) and [`DateTime`](@ref) values can be rounded to a specified resolution (e.g., 1
@@ -770,6 +804,7 @@ Dates.toprev(::Function, ::Dates.TimeType)
 ```@docs
 Dates.Period(::Any)
 Dates.CompoundPeriod(::Vector{<:Dates.Period})
+Dates.canonicalize
 Dates.value
 Dates.default
 Dates.periods
@@ -847,6 +882,15 @@ Months of the Year:
 | `November`  | `Nov` | 11          |
 | `December`  | `Dec` | 12          |
 
+#### Common Date Formatters
+
+```@docs
+ISODateTimeFormat
+ISODateFormat
+ISOTimeFormat
+RFC1123Format
+```
+
 ```@meta
 DocTestSetup = nothing
 ```
diff --git a/stdlib/Dates/src/io.jl b/stdlib/Dates/src/io.jl
index 721e36deef7a89..1901661fff0e24 100644
--- a/stdlib/Dates/src/io.jl
+++ b/stdlib/Dates/src/io.jl
@@ -367,8 +367,9 @@ When parsing a time with a `p` specifier, any hour (either `H` or `I`) is interp
 as a 12-hour clock, so the `I` code is mainly useful for output.
 
 Creating a DateFormat object is expensive. Whenever possible, create it once and use it many times
-or try the `dateformat""` string macro. Using this macro creates the DateFormat object once at
-macro expansion time and reuses it later. see [`@dateformat_str`](@ref).
+or try the [`dateformat""`](@ref @dateformat_str) string macro. Using this macro creates the DateFormat
+object once at macro expansion time and reuses it later. There are also several [pre-defined formatters](@ref
+Common-Date-Formatters), listed later.
 
 See [`DateTime`](@ref) and [`format`](@ref) for how to use a DateFormat object to parse and write Date strings
 respectively.
@@ -443,14 +444,63 @@ macro dateformat_str(str)
 end
 
 # Standard formats
+
+"""
+    Dates.ISODateTimeFormat
+
+Describes the ISO8601 formatting for a date and time. This is the default value for `Dates.format`
+of a `DateTime`.
+
+# Example
+```jldoctest
+julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), ISODateTimeFormat)
+"2018-08-08T12:00:43.001"
+```
+"""
 const ISODateTimeFormat = DateFormat("yyyy-mm-dd\\THH:MM:SS.s")
+default_format(::Type{DateTime}) = ISODateTimeFormat
+
+"""
+    Dates.ISODateFormat
+
+Describes the ISO8601 formatting for a date. This is the default value for `Dates.format` of a `Date`.
+
+# Example
+```jldoctest
+julia> Dates.format(Date(2018, 8, 8), ISODateFormat)
+"2018-08-08"
+```
+"""
 const ISODateFormat = DateFormat("yyyy-mm-dd")
+default_format(::Type{Date}) = ISODateFormat
+
+"""
+    Dates.ISOTimeFormat
+
+Describes the ISO8601 formatting for a time. This is the default value for `Dates.format` of a `Time`.
+
+# Example
+```jldoctest
+julia> Dates.format(Time(12, 0, 43, 1), ISOTimeFormat)
+"12:00:43.001"
+```
+"""
 const ISOTimeFormat = DateFormat("HH:MM:SS.s")
+default_format(::Type{Time}) = ISOTimeFormat
+
+"""
+    Dates.RFC1123Format
+
+Describes the RFC1123 formatting for a date and time.
+
+# Example
+```jldoctest
+julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), RFC1123Format)
+"Wed, 08 Aug 2018 12:00:43"
+```
+"""
 const RFC1123Format = DateFormat("e, dd u yyyy HH:MM:SS")
 
-default_format(::Type{DateTime}) = ISODateTimeFormat
-default_format(::Type{Date}) = ISODateFormat
-default_format(::Type{Time}) = ISOTimeFormat
 
 ### API
 
@@ -462,9 +512,23 @@ const Locale = Union{DateLocale, String}
 Construct a `DateTime` by parsing the `dt` date time string following the
 pattern given in the `format` string (see [`DateFormat`](@ref)  for syntax).
 
-This method creates a `DateFormat` object each time it is called. If you are
-parsing many date time strings of the same format, consider creating a
-[`DateFormat`](@ref) object once and using that as the second argument instead.
+!!! note
+    This method creates a `DateFormat` object each time it is called. It is recommended
+    that you create a [`DateFormat`](@ref) object instead and use that as the second
+    argument to avoid performance loss when using the same format repeatedly.
+
+# Example
+```jldoctest
+julia> DateTime("2020-01-01", "yyyy-mm-dd")
+2020-01-01T00:00:00
+
+julia> a = ("2020-01-01", "2020-01-02");
+
+julia> [DateTime(d, dateformat"yyyy-mm-dd") for d ∈ a] # preferred
+2-element Vector{DateTime}:
+ 2020-01-01T00:00:00
+ 2020-01-02T00:00:00
+```
 """
 function DateTime(dt::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
     return parse(DateTime, dt, DateFormat(format, locale))
@@ -488,9 +552,23 @@ DateTime(dt::AbstractString, df::DateFormat=ISODateTimeFormat) = parse(DateTime,
 Construct a `Date` by parsing the `d` date string following the pattern given
 in the `format` string (see [`DateFormat`](@ref) for syntax).
 
-This method creates a `DateFormat` object each time it is called. If you are
-parsing many date strings of the same format, consider creating a
-[`DateFormat`](@ref) object once and using that as the second argument instead.
+!!! note
+    This method creates a `DateFormat` object each time it is called. It is recommended
+    that you create a [`DateFormat`](@ref) object instead and use that as the second
+    argument to avoid performance loss when using the same format repeatedly.
+
+# Example
+```jldoctest
+julia> Date("2020-01-01", "yyyy-mm-dd")
+2020-01-01
+
+julia> a = ("2020-01-01", "2020-01-02");
+
+julia> [Date(d, dateformat"yyyy-mm-dd") for d ∈ a] # preferred
+2-element Vector{Date}:
+ 2020-01-01
+ 2020-01-02
+```
 """
 function Date(d::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
     parse(Date, d, DateFormat(format, locale))
@@ -514,9 +592,23 @@ Date(d::AbstractString, df::DateFormat=ISODateFormat) = parse(Date, d, df)
 Construct a `Time` by parsing the `t` time string following the pattern given
 in the `format` string (see [`DateFormat`](@ref) for syntax).
 
-This method creates a `DateFormat` object each time it is called. If you are
-parsing many time strings of the same format, consider creating a
-[`DateFormat`](@ref) object once and using that as the second argument instead.
+!!! note
+    This method creates a `DateFormat` object each time it is called. It is recommended
+    that you create a [`DateFormat`](@ref) object instead and use that as the second
+    argument to avoid performance loss when using the same format repeatedly.
+
+# Example
+```jldoctest
+julia> Time("12:34pm", "HH:MMp")
+12:34:00
+
+julia> a = ("12:34pm", "2:34am");
+
+julia> [Time(d, dateformat"HH:MMp") for d ∈ a] # preferred
+2-element Vector{Time}:
+ 12:34:00
+ 02:34:00
+```
 """
 function Time(t::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
     parse(Time, t, DateFormat(format, locale))
diff --git a/stdlib/Dates/src/parse.jl b/stdlib/Dates/src/parse.jl
index 07a65a73b70c33..a5bbc686c955d5 100644
--- a/stdlib/Dates/src/parse.jl
+++ b/stdlib/Dates/src/parse.jl
@@ -282,7 +282,7 @@ function Base.parse(::Type{T}, str::AbstractString, df::DateFormat=default_forma
     val = tryparsenext_internal(T, str, pos, len, df, true)
     @assert val !== nothing
     values, endpos = val
-    return T(values...)
+    return T(values...)::T
 end
 
 function Base.tryparse(::Type{T}, str::AbstractString, df::DateFormat=default_format(T)) where T<:TimeType
@@ -292,7 +292,7 @@ function Base.tryparse(::Type{T}, str::AbstractString, df::DateFormat=default_fo
     values, endpos = res
     if validargs(T, values...) === nothing
         # TODO: validargs gets called twice, since it's called again in the T constructor
-        return T(values...)
+        return T(values...)::T
     end
     return nothing
 end
diff --git a/stdlib/Dates/src/periods.jl b/stdlib/Dates/src/periods.jl
index 22c792cb2f333f..61df01302521b7 100644
--- a/stdlib/Dates/src/periods.jl
+++ b/stdlib/Dates/src/periods.jl
@@ -357,6 +357,9 @@ function Base.string(x::CompoundPeriod)
 end
 Base.show(io::IO,x::CompoundPeriod) = print(io, string(x))
 
+Base.convert(::Type{T}, x::CompoundPeriod) where T<:Period =
+    isconcretetype(T) ? sum(T, x.periods) : throw(MethodError(convert,(T,x)))
+
 # E.g. Year(1) + Day(1)
 (+)(x::Period,y::Period) = CompoundPeriod(Period[x, y])
 (+)(x::CompoundPeriod, y::Period) = CompoundPeriod(vcat(x.periods, y))
diff --git a/stdlib/Dates/test/arithmetic.jl b/stdlib/Dates/test/arithmetic.jl
index a1a6884d4b0c36..485fea56240660 100644
--- a/stdlib/Dates/test/arithmetic.jl
+++ b/stdlib/Dates/test/arithmetic.jl
@@ -508,4 +508,13 @@ end
     end
 end
 
+@testset "Diff of dates" begin
+    for t ∈ [Day, Week, Hour, Minute]
+        a = DateTime(2021,1,1):t(1):DateTime(2021,2,1)
+        d = diff(a)
+        @test d == diff(collect(a))
+        @test eltype(d) === typeof(a[1] - a[2])
+    end
+end
+
 end
diff --git a/stdlib/Dates/test/io.jl b/stdlib/Dates/test/io.jl
index 11a02af9e12559..822d0101c28ba8 100644
--- a/stdlib/Dates/test/io.jl
+++ b/stdlib/Dates/test/io.jl
@@ -572,4 +572,14 @@ end
     end
 end
 
+@testset "inference with dynamic dateformat string" begin
+    datetime = DateTime(2020, 4, 7)
+    f1() = DateTime("2020-04-07", "yyyy-mm-dd")
+    f2() = DateTime("2020-04-07", DateFormat("yyyy-mm-dd"))
+    f3() = parse(DateTime, "2020-04-07", DateFormat("yyyy-mm-dd"))
+    @test (@inferred f1()) == (@inferred f2()) == (@inferred f3()) == datetime
+    g() = tryparse(DateTime, "2020-04-07", DateFormat("yyyy-mm-dd"))
+    @test (@inferred Nothing g()) == datetime
+end
+
 end
diff --git a/stdlib/Dates/test/periods.jl b/stdlib/Dates/test/periods.jl
index 81aacd1a9e54b6..3bb310be4ee84a 100644
--- a/stdlib/Dates/test/periods.jl
+++ b/stdlib/Dates/test/periods.jl
@@ -519,5 +519,18 @@ end
     #Test combined Fixed and Other Periods
     @test (1m + 1d < 1m + 1s) == false
 end
+
+@testset "Convert CompoundPeriod to Period" begin
+    @test convert(Month, Year(1) + Month(1)) === Month(13)
+    @test convert(Second, Minute(1) + Second(30)) === Second(90)
+    @test convert(Minute, Minute(1) + Second(60)) === Minute(2)
+    @test convert(Millisecond, Minute(1) + Second(30)) === Millisecond(90_000)
+    @test_throws InexactError convert(Minute, Minute(1) + Second(30))
+    @test_throws MethodError convert(Month, Minute(1) + Second(30))
+    @test_throws MethodError convert(Second, Month(1) + Second(30))
+    @test_throws MethodError convert(Period, Minute(1) + Second(30))
+    @test_throws MethodError convert(Dates.FixedPeriod, Minute(1) + Second(30))
+end
+
 end
 
diff --git a/stdlib/Dates/test/ranges.jl b/stdlib/Dates/test/ranges.jl
index 6eb63713768677..52416fc95ec0ca 100644
--- a/stdlib/Dates/test/ranges.jl
+++ b/stdlib/Dates/test/ranges.jl
@@ -515,7 +515,7 @@ end
 @test length(Dates.Year(1):Dates.Year(1):Dates.Year(10)) == 10
 @test length(Dates.Year(10):Dates.Year(-1):Dates.Year(1)) == 10
 @test length(Dates.Year(10):Dates.Year(-2):Dates.Year(1)) == 5
-@test_throws OverflowError length(typemin(Dates.Year):Dates.Year(1):typemax(Dates.Year))
+@test length(typemin(Dates.Year):Dates.Year(1):typemax(Dates.Year)) == 0 # overflow
 @test_throws MethodError Dates.Date(0):Dates.DateTime(2000)
 @test_throws MethodError Dates.Date(0):Dates.Year(10)
 @test length(range(Dates.Date(2000), step=Dates.Day(1), length=366)) == 366
diff --git a/stdlib/DelimitedFiles/src/DelimitedFiles.jl b/stdlib/DelimitedFiles/src/DelimitedFiles.jl
index bf88c12ea9cf2c..3091fed79fee89 100644
--- a/stdlib/DelimitedFiles/src/DelimitedFiles.jl
+++ b/stdlib/DelimitedFiles/src/DelimitedFiles.jl
@@ -190,8 +190,9 @@ Specifying `skipstart` will ignore the corresponding number of initial lines fro
 If `skipblanks` is `true`, blank lines in the input will be ignored.
 
 If `use_mmap` is `true`, the file specified by `source` is memory mapped for potential
-speedups. Default is `true` except on Windows. On Windows, you may want to specify `true` if
-the file is large, and is only read once and not written to.
+speedups if the file is large. Default is `false'. On a Windows filesystem, `use_mmap` should not be set
+to `true` unless the file is only read once and is also not written to.
+Some edge cases exist where an OS is Unix-like but the filesystem is Windows-like.
 
 If `quotes` is `true`, columns enclosed within double-quote (\") characters are allowed to
 contain new lines and column delimiters. Double-quote characters within a quoted field must
@@ -232,7 +233,7 @@ readdlm_auto(input::IO, dlm::AbstractChar, T::Type, eol::AbstractChar, auto::Boo
 function readdlm_auto(input::AbstractString, dlm::AbstractChar, T::Type, eol::AbstractChar, auto::Bool; opts...)
     isfile(input) || throw(ArgumentError("Cannot open \'$input\': not a file"))
     optsd = val_opts(opts)
-    use_mmap = get(optsd, :use_mmap, Sys.iswindows() ? false : true)
+    use_mmap = get(optsd, :use_mmap, false)
     fsz = filesize(input)
     if use_mmap && fsz > 0 && fsz < typemax(Int)
         a = open(input, "r") do f
diff --git a/stdlib/Distributed/docs/src/index.md b/stdlib/Distributed/docs/src/index.md
index 1b1675eccc1a29..dc8cef5e22d927 100644
--- a/stdlib/Distributed/docs/src/index.md
+++ b/stdlib/Distributed/docs/src/index.md
@@ -1,4 +1,4 @@
-# Distributed Computing
+# [Distributed Computing](@id man-distributed)
 
 ```@docs
 Distributed.addprocs
diff --git a/stdlib/Distributed/src/Distributed.jl b/stdlib/Distributed/src/Distributed.jl
index 50108f05eed26a..dd9101fa1b4ce9 100644
--- a/stdlib/Distributed/src/Distributed.jl
+++ b/stdlib/Distributed/src/Distributed.jl
@@ -13,7 +13,8 @@ import Base: getindex, wait, put!, take!, fetch, isready, push!, length,
 using Base: Process, Semaphore, JLOptions, buffer_writes, @sync_add,
             VERSION_STRING, binding_module, atexit, julia_exename,
             julia_cmd, AsyncGenerator, acquire, release, invokelatest,
-            shell_escape_posixly, shell_escape_wincmd, escape_microsoft_c_args,
+            shell_escape_posixly, shell_escape_csh,
+            shell_escape_wincmd, escape_microsoft_c_args,
             uv_error, something, notnothing, isbuffered, mapany
 using Base.Threads: Event
 
diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl
index 7329e1b91d37b6..ebe4cac0f3bbe4 100644
--- a/stdlib/Distributed/src/cluster.jl
+++ b/stdlib/Distributed/src/cluster.jl
@@ -160,17 +160,18 @@ function check_worker_state(w::Worker)
         else
             w.ct_time = time()
             if myid() > w.id
-                @async exec_conn_func(w)
+                t = @async exec_conn_func(w)
             else
                 # route request via node 1
-                @async remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1, w.id, myid())
+                t = @async remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1, w.id, myid())
             end
+            errormonitor(t)
             wait_for_conn(w)
         end
     end
 end
 
-exec_conn_func(id::Int) = exec_conn_func(worker_from_id(id))
+exec_conn_func(id::Int) = exec_conn_func(worker_from_id(id)::Worker)
 function exec_conn_func(w::Worker)
     try
         f = notnothing(w.conn_func)
@@ -242,10 +243,10 @@ function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_std
     else
         sock = listen(interface, LPROC.bind_port)
     end
-    @async while isopen(sock)
+    errormonitor(@async while isopen(sock)
         client = accept(sock)
         process_messages(client, client, true)
-    end
+    end)
     print(out, "julia_worker:")  # print header
     print(out, "$(string(LPROC.bind_port))#") # print port
     print(out, LPROC.bind_addr)
@@ -274,7 +275,7 @@ end
 
 
 function redirect_worker_output(ident, stream)
-    @async while !eof(stream)
+    t = @async while !eof(stream)
         line = readline(stream)
         if startswith(line, "      From worker ")
             # stdout's of "additional" workers started from an initial worker on a host are not available
@@ -284,6 +285,7 @@ function redirect_worker_output(ident, stream)
             println("      From worker $(ident):\t$line")
         end
     end
+    errormonitor(t)
 end
 
 struct LaunchWorkerError <: Exception
@@ -349,7 +351,7 @@ end
 function parse_connection_info(str)
     m = match(r"^julia_worker:(\d+)#(.*)", str)
     if m !== nothing
-        (m.captures[2], parse(UInt16, m.captures[1]))
+        (String(m.captures[2]), parse(UInt16, m.captures[1]))
     else
         ("", UInt16(0))
     end
@@ -849,7 +851,7 @@ julia> nprocs()
 3
 
 julia> workers()
-5-element Array{Int64,1}:
+2-element Array{Int64,1}:
  2
  3
 ```
diff --git a/stdlib/Distributed/src/clusterserialize.jl b/stdlib/Distributed/src/clusterserialize.jl
index 3fde2bb4c2bd68..e37987c5bf8751 100644
--- a/stdlib/Distributed/src/clusterserialize.jl
+++ b/stdlib/Distributed/src/clusterserialize.jl
@@ -102,19 +102,6 @@ function serialize(s::ClusterSerializer, t::Core.TypeName)
     nothing
 end
 
-function serialize(s::ClusterSerializer, t::Task)
-    serialize_cycle(s, t) && return
-    if istaskstarted(t) && !istaskdone(t)
-        error("cannot serialize a running Task")
-    end
-    writetag(s.io, TASK_TAG)
-    serialize(s, t.code)
-    serialize(s, t.storage)
-    serialize(s, t._state)
-    serialize(s, t.result)
-    serialize(s, t._isexception)
-end
-
 function serialize(s::ClusterSerializer, g::GlobalRef)
     # Record if required and then invoke the default GlobalRef serializer.
     sym = g.name
@@ -244,17 +231,6 @@ function deserialize(s::ClusterSerializer, t::Type{<:CapturedException})
     return CapturedException(capex, bt)
 end
 
-function deserialize(s::ClusterSerializer, ::Type{Task})
-    t = Task(nothing)
-    deserialize_cycle(s, t)
-    t.code = deserialize(s)
-    t.storage = deserialize(s)
-    t._state = deserialize(s)::UInt8
-    t.result = deserialize(s)
-    t._isexception = deserialize(s)
-    t
-end
-
 """
     clear!(syms, pids=workers(); mod=Main)
 
diff --git a/stdlib/Distributed/src/macros.jl b/stdlib/Distributed/src/macros.jl
index b53890017d4dea..f96338b69e9fb4 100644
--- a/stdlib/Distributed/src/macros.jl
+++ b/stdlib/Distributed/src/macros.jl
@@ -279,9 +279,10 @@ function preduce(reducer, f, R)
 end
 
 function pfor(f, R)
-    @async @sync for c in splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
+    t = @async @sync for c in splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
         @spawnat :any f(R, first(c), last(c))
     end
+    errormonitor(t)
 end
 
 function make_preduce_body(var, body)
@@ -346,6 +347,9 @@ macro distributed(args...)
     var = loop.args[1].args[1]
     r = loop.args[1].args[2]
     body = loop.args[2]
+    if Meta.isexpr(body, :block) && body.args[end] isa LineNumberNode
+        resize!(body.args, length(body.args) - 1)
+    end
     if na==1
         syncvar = esc(Base.sync_varname)
         return quote
diff --git a/stdlib/Distributed/src/managers.jl b/stdlib/Distributed/src/managers.jl
index 3519259190fbc3..91a27aa95cb980 100644
--- a/stdlib/Distributed/src/managers.jl
+++ b/stdlib/Distributed/src/managers.jl
@@ -82,7 +82,10 @@ Keyword arguments:
 
 * `shell`: specifies the type of shell to which ssh connects on the workers.
 
-    + `shell=:posix`: a POSIX-compatible Unix/Linux shell (bash, sh, etc.). The default.
+    + `shell=:posix`: a POSIX-compatible Unix/Linux shell
+      (sh, ksh, bash, dash, zsh, etc.). The default.
+
+    + `shell=:csh`: a Unix C shell (csh, tcsh).
 
     + `shell=:wincmd`: Microsoft Windows `cmd.exe`.
 
@@ -158,22 +161,15 @@ default_addprocs_params(::SSHManager) =
 function launch(manager::SSHManager, params::Dict, launched::Array, launch_ntfy::Condition)
     # Launch one worker on each unique host in parallel. Additional workers are launched later.
     # Wait for all launches to complete.
-    launch_tasks = Vector{Any}(undef, length(manager.machines))
-
-    for (i, (machine, cnt)) in enumerate(manager.machines)
+    @sync for (i, (machine, cnt)) in enumerate(manager.machines)
         let machine=machine, cnt=cnt
-            launch_tasks[i] = @async try
-                    launch_on_machine(manager, machine, cnt, params, launched, launch_ntfy)
-                catch e
-                    print(stderr, "exception launching on machine $(machine) : $(e)\n")
-                end
+             @async try
+                launch_on_machine(manager, $machine, $cnt, params, launched, launch_ntfy)
+            catch e
+                print(stderr, "exception launching on machine $(machine) : $(e)\n")
+            end
         end
     end
-
-    for t in launch_tasks
-        wait(t::Task)
-    end
-
     notify(launch_ntfy)
 end
 
@@ -187,7 +183,7 @@ function parse_machine(machine::AbstractString)
 
     if machine[begin] == '['  # ipv6 bracket notation (RFC 2732)
         ipv6_end = findlast(']', machine)
-        if ipv6_end == nothing
+        if ipv6_end === nothing
             throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine_def\""))
         end
         hoststr = machine[begin+1 : prevind(machine,ipv6_end)]
@@ -205,7 +201,7 @@ function parse_machine(machine::AbstractString)
         portstr = machine_def[2]
 
         portnum = tryparse(Int, portstr)
-        if portnum == nothing
+        if portnum === nothing
             msg = "invalid machine definition format string: invalid port format \"$machine_def\""
             throw(ArgumentError(msg))
         end
@@ -288,6 +284,22 @@ function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, pa
         # shell login (-l) with string command (-c) to launch julia process
         remotecmd = shell_escape_posixly(`sh -l -c $cmds`)
 
+    elseif shell == :csh
+        # ssh connects to (t)csh
+
+        remotecmd = "$(shell_escape_csh(exename)) $(shell_escape_csh(exeflags))"
+
+        # set environment variables
+        for (var, val) in env
+            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
+                throw(ArgumentError("invalid env key $var"))
+            remotecmd = "setenv $(var) $(shell_escape_csh(val))\n$remotecmd"
+        end
+        # change working directory
+        if dir !== nothing && dir != ""
+            remotecmd = "cd $(shell_escape_csh(dir))\n$remotecmd"
+        end
+
     elseif shell == :wincmd
         # ssh connects to Windows cmd.exe
 
diff --git a/stdlib/Distributed/src/messages.jl b/stdlib/Distributed/src/messages.jl
index bfff8a4d5d1a73..47f70e044a2c0e 100644
--- a/stdlib/Distributed/src/messages.jl
+++ b/stdlib/Distributed/src/messages.jl
@@ -80,18 +80,18 @@ for (idx, tname) in enumerate(msgtypes)
     end
 end
 
-let msg_cases = :(@assert false)
+let msg_cases = :(@assert false "Message type index ($idx) expected to be between 1:$($(length(msgtypes)))")
     for i = length(msgtypes):-1:1
         mti = msgtypes[i]
         msg_cases = :(if idx == $i
-                          return $(Expr(:call, QuoteNode(mti), fill(:(deserialize(s)), fieldcount(mti))...))
+                          $(Expr(:call, QuoteNode(mti), fill(:(deserialize(s)), fieldcount(mti))...))
                       else
                           $msg_cases
                       end)
     end
     @eval function deserialize_msg(s::AbstractSerializer)
         idx = read(s.io, UInt8)
-        $msg_cases
+        return $msg_cases
     end
 end
 
diff --git a/stdlib/Distributed/src/process_messages.jl b/stdlib/Distributed/src/process_messages.jl
index 3216a4e1c73c6e..732b972858dc97 100644
--- a/stdlib/Distributed/src/process_messages.jl
+++ b/stdlib/Distributed/src/process_messages.jl
@@ -78,7 +78,7 @@ function schedule_call(rid, thunk)
         rv = RemoteValue(def_rv_channel())
         (PGRP::ProcessGroup).refs[rid] = rv
         push!(rv.clientset, rid.whence)
-        @async run_work_thunk(rv, thunk)
+        errormonitor(@async run_work_thunk(rv, thunk))
         return rv
     end
 end
@@ -111,7 +111,7 @@ end
 
 ## message event handlers ##
 function process_messages(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool=true)
-    @async process_tcp_streams(r_stream, w_stream, incoming)
+    errormonitor(@async process_tcp_streams(r_stream, w_stream, incoming))
 end
 
 function process_tcp_streams(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool)
@@ -141,7 +141,7 @@ Julia version number to perform the authentication handshake.
 See also [`cluster_cookie`](@ref).
 """
 function process_messages(r_stream::IO, w_stream::IO, incoming::Bool=true)
-    @async message_handler_loop(r_stream, w_stream, incoming)
+    errormonitor(@async message_handler_loop(r_stream, w_stream, incoming))
 end
 
 function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
@@ -230,8 +230,8 @@ function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
             deregister_worker(wpid)
         end
 
-        isopen(r_stream) && close(r_stream)
-        isopen(w_stream) && close(w_stream)
+        close(r_stream)
+        close(w_stream)
 
         if (myid() == 1) && (wpid > 1)
             if oldstate != W_TERMINATING
@@ -274,7 +274,7 @@ function handle_msg(msg::CallMsg{:call}, header, r_stream, w_stream, version)
     schedule_call(header.response_oid, ()->msg.f(msg.args...; msg.kwargs...))
 end
 function handle_msg(msg::CallMsg{:call_fetch}, header, r_stream, w_stream, version)
-    @async begin
+    errormonitor(@async begin
         v = run_work_thunk(()->msg.f(msg.args...; msg.kwargs...), false)
         if isa(v, SyncTake)
             try
@@ -285,18 +285,20 @@ function handle_msg(msg::CallMsg{:call_fetch}, header, r_stream, w_stream, versi
         else
             deliver_result(w_stream, :call_fetch, header.notify_oid, v)
         end
-    end
+        nothing
+    end)
 end
 
 function handle_msg(msg::CallWaitMsg, header, r_stream, w_stream, version)
-    @async begin
+    errormonitor(@async begin
         rv = schedule_call(header.response_oid, ()->msg.f(msg.args...; msg.kwargs...))
         deliver_result(w_stream, :call_wait, header.notify_oid, fetch(rv.c))
-    end
+        nothing
+    end)
 end
 
 function handle_msg(msg::RemoteDoMsg, header, r_stream, w_stream, version)
-    @async run_work_thunk(()->msg.f(msg.args...; msg.kwargs...), true)
+    errormonitor(@async run_work_thunk(()->msg.f(msg.args...; msg.kwargs...), true))
 end
 
 function handle_msg(msg::ResultMsg, header, r_stream, w_stream, version)
@@ -330,8 +332,7 @@ function handle_msg(msg::JoinPGRPMsg, header, r_stream, w_stream, version)
     lazy = msg.lazy
     PGRP.lazy = lazy
 
-    wait_tasks = Task[]
-    for (connect_at, rpid) in msg.other_workers
+    @sync for (connect_at, rpid) in msg.other_workers
         wconfig = WorkerConfig()
         wconfig.connect_at = connect_at
 
@@ -340,14 +341,11 @@ function handle_msg(msg::JoinPGRPMsg, header, r_stream, w_stream, version)
                 # The constructor registers the object with a global registry.
                 Worker(rpid, ()->connect_to_peer(cluster_manager, rpid, wconfig))
             else
-                t = @async connect_to_peer(cluster_manager, rpid, wconfig)
-                push!(wait_tasks, t)
+                @async connect_to_peer(cluster_manager, rpid, wconfig)
             end
         end
     end
 
-    for wt in wait_tasks; Base.wait(wt); end
-
     send_connection_hdr(controller, false)
     send_msg_now(controller, MsgHeader(RRID(0,0), header.notify_oid), JoinCompleteMsg(Sys.CPU_THREADS, getpid()))
 end
diff --git a/stdlib/Distributed/src/remotecall.jl b/stdlib/Distributed/src/remotecall.jl
index f4845221a611a4..fabcf106860688 100644
--- a/stdlib/Distributed/src/remotecall.jl
+++ b/stdlib/Distributed/src/remotecall.jl
@@ -84,20 +84,24 @@ end
 
 function finalize_ref(r::AbstractRemoteRef)
     if r.where > 0 # Handle the case of the finalizer having been called manually
-        if islocked(client_refs)
-            # delay finalizer for later, when it's not already locked
+        if trylock(client_refs.lock) # trylock doesn't call wait which causes yields
+            try
+                delete!(client_refs.ht, r) # direct removal avoiding locks
+                if isa(r, RemoteChannel)
+                    send_del_client_no_lock(r)
+                else
+                    # send_del_client only if the reference has not been set
+                    r.v === nothing && send_del_client_no_lock(r)
+                    r.v = nothing
+                end
+                r.where = 0
+            finally
+                unlock(client_refs.lock)
+            end
+        else
             finalizer(finalize_ref, r)
             return nothing
         end
-        delete!(client_refs, r)
-        if isa(r, RemoteChannel)
-            send_del_client(r)
-        else
-            # send_del_client only if the reference has not been set
-            r.v === nothing && send_del_client(r)
-            r.v = nothing
-        end
-        r.where = 0
     end
     nothing
 end
@@ -192,7 +196,7 @@ or to use a local [`Channel`](@ref) as a proxy:
 ```julia
 p = 1
 f = Future(p)
-@async put!(f, remotecall_fetch(long_computation, p))
+errormonitor(@async put!(f, remotecall_fetch(long_computation, p)))
 isready(f)  # will not block
 ```
 """
@@ -229,13 +233,18 @@ del_client(rr::AbstractRemoteRef) = del_client(remoteref_id(rr), myid())
 del_client(id, client) = del_client(PGRP, id, client)
 function del_client(pg, id, client)
     lock(client_refs) do
-        rv = get(pg.refs, id, false)
-        if rv !== false
-            delete!(rv.clientset, client)
-            if isempty(rv.clientset)
-                delete!(pg.refs, id)
-                #print("$(myid()) collected $id\n")
-            end
+        _del_client(pg, id, client)
+    end
+    nothing
+end
+
+function _del_client(pg, id, client)
+    rv = get(pg.refs, id, false)
+    if rv !== false
+        delete!(rv.clientset, client)
+        if isempty(rv.clientset)
+            delete!(pg.refs, id)
+            #print("$(myid()) collected $id\n")
         end
     end
     nothing
@@ -249,23 +258,36 @@ end
 
 const any_gc_flag = Condition()
 function start_gc_msgs_task()
-    @async while true
+    errormonitor(@async while true
         wait(any_gc_flag)
         flush_gc_msgs()
-    end
+    end)
 end
 
 function send_del_client(rr)
     if rr.where == myid()
         del_client(rr)
     elseif id_in_procs(rr.where) # process only if a valid worker
-        w = worker_from_id(rr.where)
-        push!(w.del_msgs, (remoteref_id(rr), myid()))
-        w.gcflag = true
-        notify(any_gc_flag)
+        process_worker(rr)
     end
 end
 
+function send_del_client_no_lock(rr)
+    # for gc context to avoid yields
+    if rr.where == myid()
+        _del_client(PGRP, remoteref_id(rr), myid())
+    elseif id_in_procs(rr.where) # process only if a valid worker
+        process_worker(rr)
+    end
+end
+
+function process_worker(rr)
+    w = worker_from_id(rr.where)::Worker
+    push!(w.del_msgs, (remoteref_id(rr), myid()))
+    w.gcflag = true
+    notify(any_gc_flag)
+end
+
 function add_client(id, client)
     lock(client_refs) do
         rv = lookup_ref(id)
@@ -544,7 +566,7 @@ fetch_ref(rid, args...) = fetch(lookup_ref(rid).c, args...)
 Wait for and get a value from a [`RemoteChannel`](@ref). Exceptions raised are the
 same as for a [`Future`](@ref). Does not remove the item fetched.
 """
-fetch(r::RemoteChannel, args...) = call_on_owner(fetch_ref, r, args...)
+fetch(r::RemoteChannel, args...) = call_on_owner(fetch_ref, r, args...)::eltype(r)
 
 isready(rv::RemoteValue, args...) = isready(rv.c, args...)
 
@@ -607,7 +629,15 @@ function take_ref(rid, caller, args...)
         lock(rv.synctake)
     end
 
-    v=take!(rv, args...)
+    v = try
+        take!(rv, args...)
+    catch e
+        # avoid unmatched unlock when exception occurs
+        # github issue #33972
+        synctake && unlock(rv.synctake)
+        rethrow(e)
+    end
+
     isa(v, RemoteException) && (myid() == caller) && throw(v)
 
     if synctake
@@ -623,7 +653,7 @@ end
 Fetch value(s) from a [`RemoteChannel`](@ref) `rr`,
 removing the value(s) in the process.
 """
-take!(rr::RemoteChannel, args...) = call_on_owner(take_ref, rr, myid(), args...)
+take!(rr::RemoteChannel, args...) = call_on_owner(take_ref, rr, myid(), args...)::eltype(rr)
 
 # close and isopen are not supported on Future
 
diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl
index c36a79600773c3..749c18f6b61f05 100644
--- a/stdlib/Distributed/test/distributed_exec.jl
+++ b/stdlib/Distributed/test/distributed_exec.jl
@@ -853,6 +853,13 @@ end
         return :OK
     end, id_other, rc_unbuffered) == :OK
 
+# github issue 33972
+rc_unbuffered_other = RemoteChannel(()->Channel{Int}(0), id_other)
+close(rc_unbuffered_other)
+try; take!(rc_unbuffered_other); catch; end
+@test !remotecall_fetch(rc -> islocked(Distributed.lookup_ref(remoteref_id(rc)).synctake),
+                        id_other, rc_unbuffered_other)
+
 # github PR #14456
 n = DoFullTest ? 6 : 5
 for i = 1:10^n
@@ -1675,7 +1682,7 @@ let e = @test_throws RemoteException pmap(1) do _
     es = sprint(showerror, e.value)
     @test contains(es, ":\nTaskFailedException\nStacktrace:\n")
     @test contains(es, "\n\n    nested task error:")
-    @test_broken contains(es, "\n\n    nested task error: 42\n")
+    @test contains(es, "\n\n    nested task error: 42\n")
 end
 
 # issue #27429, propagate relative `include` path to workers
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index 9dc2783a1389bd..51f7f8d24d2f36 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,2 +1,2 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = 6bb83068bd796c4890baaeb39628ff79a4979374
+DOWNLOADS_SHA1 = 848d374fc563fa9dc6b4d5e6e5be5ad2022652a7
diff --git a/stdlib/GMP_jll/Project.toml b/stdlib/GMP_jll/Project.toml
index 2f66e4eb5aaba0..a4d989a4101995 100644
--- a/stdlib/GMP_jll/Project.toml
+++ b/stdlib/GMP_jll/Project.toml
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/InteractiveUtils/docs/src/index.md b/stdlib/InteractiveUtils/docs/src/index.md
index 6b996fb333fc5d..9ad4b5a7cea80b 100644
--- a/stdlib/InteractiveUtils/docs/src/index.md
+++ b/stdlib/InteractiveUtils/docs/src/index.md
@@ -1,4 +1,4 @@
-# Interactive Utilities
+# [Interactive Utilities](@id man-interactive-utils)
 
 This module is intended for interactive work. It is loaded automaticaly in [interactive mode](@ref command-line-options).
 
@@ -26,5 +26,6 @@ InteractiveUtils.code_llvm
 InteractiveUtils.@code_llvm
 InteractiveUtils.code_native
 InteractiveUtils.@code_native
+InteractiveUtils.@time_imports
 InteractiveUtils.clipboard
 ```
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 9d989148878fbd..51a5dcd69cd569 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -6,7 +6,7 @@ Base.Experimental.@optlevel 1
 
 export apropos, edit, less, code_warntype, code_llvm, code_native, methodswith, varinfo,
     versioninfo, subtypes, supertypes, @which, @edit, @less, @functionloc, @code_warntype,
-    @code_typed, @code_lowered, @code_llvm, @code_native, clipboard
+    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard
 
 import Base.Docs.apropos
 
@@ -21,7 +21,7 @@ include("macros.jl")
 include("clipboard.jl")
 
 """
-    varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name)
+    varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, minsize::Int = 0)
 
 Return a markdown table giving information about exported global variables in a module, optionally restricted
 to those matching `pattern`.
@@ -32,42 +32,45 @@ The memory consumption estimate is an approximate lower bound on the size of the
 - `imported` : also list objects explicitly imported from other modules.
 - `recursive` : recursively include objects in sub-modules, observing the same settings in each.
 - `sortby` : the column to sort results by. Options are `:name` (default), `:size`, and `:summary`.
+- `minsize` : only includes objects with size at least `minsize` bytes. Defaults to `0`.
 """
-function varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false)
-    @assert sortby in [:name, :size, :summary] "Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"
-    function _populate_rows(m2::Module, allrows, include_self::Bool, prep::String)
-        newrows = Any[
-            let
-                value = getfield(m2, v)
-                ssize_str, ssize = if value===Base || value===Main || value===Core
+function varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false, minsize::Int=0)
+    sortby in (:name, :size, :summary) || throw(ArgumentError("Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"))
+    rows = Vector{Any}[]
+    workqueue = [(m, ""),]
+    while !isempty(workqueue)
+        m2, prep = popfirst!(workqueue)
+        for v in names(m2; all, imported)
+            if !isdefined(m2, v) || !occursin(pattern, string(v))
+                continue
+            end
+            value = getfield(m2, v)
+            isbuiltin = value === Base || value === Main || value === Core
+            if recursive && !isbuiltin && isa(value, Module) && value !== m2 && nameof(value) === v && parentmodule(value) === m2
+                push!(workqueue, (value, "$prep$v."))
+            end
+            ssize_str, ssize = if isbuiltin
                     ("", typemax(Int))
                 else
                     ss = summarysize(value)
                     (format_bytes(ss), ss)
                 end
-                Any[string(prep, v), ssize_str, summary(value), ssize]
-            end
-            for v in names(m2; all, imported)
-            if (string(v) != split(string(m2), ".")[end] || include_self) && isdefined(m2, v) && occursin(pattern, string(v)) ]
-        append!(allrows, newrows)
-        if recursive
-            for row in newrows
-                if row[3] == "Module" && !in(split(row[1], ".")[end], [split(string(m2), ".")[end], "Base", "Main", "Core"])
-                    _populate_rows(getfield(m2, Symbol(split(row[1], ".")[end])), allrows, false, prep * "$(row[1]).")
-                end
+            if ssize >= minsize
+                push!(rows, Any[string(prep, v), ssize_str, summary(value), ssize])
             end
         end
-        return allrows
     end
-    rows = _populate_rows(m, Vector{Any}[], true, "")
-    if sortby == :name
-        col, reverse = 1, false
-    elseif sortby == :size
-        col, reverse = 4, true
-    elseif sortby == :summary
-        col, reverse = 3, false
+    let (col, rev) = if sortby == :name
+            1, false
+        elseif sortby == :size
+            4, true
+        elseif sortby == :summary
+            3, false
+        else
+            @assert "unreachable"
+        end
+        sort!(rows; by=r->r[col], rev)
     end
-    rows = sort!(rows, by=r->r[col], rev=reverse)
     pushfirst!(rows, Any["name", "size", "summary"])
 
     return Markdown.MD(Any[Markdown.Table(map(r->r[1:3], rows), Symbol[:l, :r, :l])])
@@ -81,6 +84,8 @@ Print information about the version of Julia in use. The output is
 controlled with boolean keyword arguments:
 
 - `verbose`: print all additional information
+
+See also: [`VERSION`](@ref).
 """
 function versioninfo(io::IO=stdout; verbose::Bool=false)
     println(io, "Julia Version $VERSION")
@@ -165,7 +170,7 @@ The optional second argument restricts the search to a particular module or func
 If keyword `supertypes` is `true`, also return arguments with a parent type of `typ`,
 excluding type `Any`.
 """
-function methodswith(t::Type, f::Function, meths = Method[]; supertypes::Bool=false)
+function methodswith(t::Type, f::Base.Callable, meths = Method[]; supertypes::Bool=false)
     for d in methods(f)
         if any(function (x)
                    let x = rewrap_unionall(x, d.sig)
@@ -187,7 +192,7 @@ function _methodswith(t::Type, m::Module, supertypes::Bool)
     for nm in names(m)
         if isdefined(m, nm)
             f = getfield(m, nm)
-            if isa(f, Function)
+            if isa(f, Base.Callable)
                 methodswith(t, f, meths; supertypes = supertypes)
             end
         end
@@ -206,54 +211,35 @@ function methodswith(t::Type; supertypes::Bool=false)
 end
 
 # subtypes
-function _subtypes(m::Module, x::Type, sts=Base.IdSet{Any}(), visited=Base.IdSet{Module}())
-    push!(visited, m)
+function _subtypes_in!(mods::Array, x::Type)
     xt = unwrap_unionall(x)
-    if !isa(xt, DataType)
-        return sts
+    if !isabstracttype(x) || !isa(xt, DataType)
+        # Fast path
+        return Type[]
     end
-    xt = xt::DataType
-    for s in names(m, all = true)
-        if isdefined(m, s) && !isdeprecated(m, s)
-            t = getfield(m, s)
-            if isa(t, DataType)
-                t = t::DataType
-                if t.name.name === s && supertype(t).name == xt.name
-                    ti = typeintersect(t, x)
-                    ti != Bottom && push!(sts, ti)
-                end
-            elseif isa(t, UnionAll)
-                t = t::UnionAll
-                tt = unwrap_unionall(t)
-                isa(tt, DataType) || continue
-                tt = tt::DataType
-                if tt.name.name === s && supertype(tt).name == xt.name
-                    ti = typeintersect(t, x)
-                    ti != Bottom && push!(sts, ti)
+    sts = Vector{Any}()
+    while !isempty(mods)
+        m = pop!(mods)
+        xt = xt::DataType
+        for s in names(m, all = true)
+            if isdefined(m, s) && !isdeprecated(m, s)
+                t = getfield(m, s)
+                dt = isa(t, UnionAll) ? unwrap_unionall(t) : t
+                if isa(dt, DataType)
+                    if dt.name.name === s && dt.name.module == m && supertype(dt).name == xt.name
+                        ti = typeintersect(t, x)
+                        ti != Bottom && push!(sts, ti)
+                    end
+                elseif isa(t, Module) && nameof(t) === s && parentmodule(t) === m && t !== m
+                    t === Base || push!(mods, t) # exclude Base, since it also parented by Main
                 end
-            elseif isa(t, Module)
-                t = t::Module
-                in(t, visited) || _subtypes(t, x, sts, visited)
             end
         end
     end
-    return sts
-end
-
-function _subtypes_in(mods::Array, x::Type)
-    if !isabstracttype(x)
-        # Fast path
-        return Type[]
-    end
-    sts = Base.IdSet{Any}()
-    visited = Base.IdSet{Module}()
-    for m in mods
-        _subtypes(m, x, sts, visited)
-    end
-    return sort!(collect(sts), by=string)
+    return permute!(sts, sortperm(map(string, sts)))
 end
 
-subtypes(m::Module, x::Type) = _subtypes_in([m], x)
+subtypes(m::Module, x::Type) = _subtypes_in!([m], x)
 
 """
     subtypes(T::DataType)
@@ -261,6 +247,8 @@ subtypes(m::Module, x::Type) = _subtypes_in([m], x)
 Return a list of immediate subtypes of DataType `T`. Note that all currently loaded subtypes
 are included, including those not visible in the current module.
 
+See also [`supertype`](@ref), [`supertypes`](@ref), [`methodswith`](@ref).
+
 # Examples
 ```jldoctest
 julia> subtypes(Integer)
@@ -270,7 +258,7 @@ julia> subtypes(Integer)
  Unsigned
 ```
 """
-subtypes(x::Type) = _subtypes_in(Base.loaded_modules_array(), x)
+subtypes(x::Type) = _subtypes_in!(Base.loaded_modules_array(), x)
 
 """
     supertypes(T::Type)
@@ -279,6 +267,8 @@ Return a tuple `(T, ..., Any)` of `T` and all its supertypes, as determined by
 successive calls to the [`supertype`](@ref) function, listed in order of `<:`
 and terminated by `Any`.
 
+See also [`subtypes`](@ref).
+
 # Examples
 ```jldoctest
 julia> supertypes(Int)
@@ -292,74 +282,6 @@ function supertypes(T::Type)
     return S === T ? (T,) : (T, supertypes(S)...)
 end
 
-# dumptype is for displaying abstract type hierarchies,
-# based on Jameson Nash's typetree.jl in https://github.com/JuliaArchive/Examples
-function dumptype(io::IO, @nospecialize(x), n::Int, indent)
-    print(io, x)
-    n == 0 && return  # too deeply nested
-    isa(x, DataType) && x.abstract && dumpsubtypes(io, x, Main, n, indent)
-    nothing
-end
-
-directsubtype(a::DataType, b::DataType) = supertype(a).name === b.name
-directsubtype(a::UnionAll, b::DataType) = directsubtype(a.body, b)
-directsubtype(a::Union, b::DataType) = directsubtype(a.a, b) || directsubtype(a.b, b)
-# Fallback to handle TypeVar's
-directsubtype(a, b::DataType) = false
-function dumpsubtypes(io::IO, x::DataType, m::Module, n::Int, indent)
-    for s in names(m, all = true)
-        if isdefined(m, s) && !isdeprecated(m, s)
-            t = getfield(m, s)
-            if t === x || t === m
-                continue
-            elseif isa(t, Module) && nameof(t) === s && parentmodule(t) === m
-                # recurse into primary module bindings
-                dumpsubtypes(io, x, t, n, indent)
-            elseif isa(t, UnionAll) && directsubtype(t::UnionAll, x)
-                dt = unwrap_unionall(t)
-                println(io)
-                if isa(dt, DataType) && dt.name.wrapper === t
-                    # primary type binding
-                    print(io, indent, "  ")
-                    dumptype(io, dt, n - 1, string(indent, "  "))
-                else
-                    # aliases to types
-                    print(io, indent, "  ", m, ".", s, "{")
-                    tvar_io::IOContext = io
-                    tp = t
-                    while true
-                        show(tvar_io, tp.var)
-                        tvar_io = IOContext(tvar_io, :unionall_env => tp.var)
-                        tp = tp.body
-                        if isa(tp, UnionAll)
-                            print(io, ", ")
-                        else
-                            print(io, "} = ")
-                            break
-                        end
-                    end
-                    show(tvar_io, tp)
-                end
-            elseif isa(t, Union) && directsubtype(t::Union, x)
-                println(io)
-                print(io, indent, "  ", m, ".", s, " = ", t)
-            elseif isa(t, DataType) && directsubtype(t::DataType, x)
-                println(io)
-                if t.name.module !== m || t.name.name != s
-                    # aliases to types
-                    print(io, indent, "  ", m, ".", s, " = ")
-                    show(io, t)
-                else
-                    # primary type binding
-                    print(io, indent, "  ")
-                    dumptype(io, t, n - 1, string(indent, "  "))
-                end
-            end
-        end
-    end
-    nothing
-end
-
 # TODO: @deprecate peakflops to LinearAlgebra
 export peakflops
 """
@@ -396,9 +318,12 @@ function report_bug(kind)
             mktempdir() do tmp
                 old_load_path = copy(LOAD_PATH)
                 push!(empty!(LOAD_PATH), joinpath(tmp, "Project.toml"))
+                old_active_project = Base.ACTIVE_PROJECT[]
+                Base.ACTIVE_PROJECT[] = nothing
                 Pkg.add(Pkg.PackageSpec(BugReportingId.name, BugReportingId.uuid))
                 BugReporting = Base.require(BugReportingId)
                 append!(empty!(LOAD_PATH), old_load_path)
+                Base.ACTIVE_PROJECT[] = old_active_project
             end
         end
     else
diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl
index b4c67da4d16dd3..7bc718b91b2bd5 100644
--- a/stdlib/InteractiveUtils/src/clipboard.jl
+++ b/stdlib/InteractiveUtils/src/clipboard.jl
@@ -37,23 +37,27 @@ elseif Sys.islinux() || Sys.KERNEL === :FreeBSD
                 `xsel --input --clipboard` :
                 `xsel -c`,
             :xclip => `xclip -silent -in -selection clipboard`,
+            :wlclipboard => `wl-copy`
         )
     const _clipboard_paste = Dict(
             :xsel  => Sys.islinux() ?
                 `xsel --nodetach --output --clipboard` :
                 `xsel -p`,
             :xclip => `xclip -quiet -out -selection clipboard`,
+            :wlclipboard => `wl-paste`
         )
     function clipboardcmd()
         global _clipboardcmd
         _clipboardcmd !== nothing && return _clipboardcmd
-        for cmd in (:xclip, :xsel)
-            success(pipeline(`which $cmd`, devnull)) && return _clipboardcmd = cmd
+        for cmd in (:xclip, :xsel, :wlclipboard)
+            # wl-clipboard ships wl-copy/paste individually
+            c = cmd == :wlclipboard ? Symbol("wl-copy") : cmd
+            success(pipeline(`which $c`, devnull)) && return _clipboardcmd = cmd
         end
         pkgs = @static if Sys.KERNEL === :FreeBSD
             "x11/xsel or x11/xclip"
         else
-            "xsel or xclip"
+            "xsel or xclip or wl-clipboard"
         end
         error("no clipboard command found, please install $pkgs")
     end
diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl
index da72fae8daeb03..b292324a17134f 100644
--- a/stdlib/InteractiveUtils/src/codeview.jl
+++ b/stdlib/InteractiveUtils/src/codeview.jl
@@ -57,10 +57,11 @@ Keyword argument `debuginfo` may be one of `:source` or `:none` (default), to sp
 
 See [`@code_warntype`](@ref man-code-warntype) for more information.
 """
-function code_warntype(io::IO, @nospecialize(f), @nospecialize(t); debuginfo::Symbol=:default, optimize::Bool=false)
+function code_warntype(io::IO, @nospecialize(f), @nospecialize(t);
+                       debuginfo::Symbol=:default, optimize::Bool=false, kwargs...)
     debuginfo = Base.IRShow.debuginfo(debuginfo)
     lineprinter = Base.IRShow.__debuginfo[debuginfo]
-    for (src, rettype) in code_typed(f, t, optimize=optimize)
+    for (src, rettype) in code_typed(f, t; optimize, kwargs...)
         lambda_io::IOContext = io
         p = src.parent
         nargs::Int = 0
@@ -127,7 +128,8 @@ function code_warntype(io::IO, @nospecialize(f), @nospecialize(t); debuginfo::Sy
         print(io, "Body")
         warntype_type_printer(io, rettype, true)
         println(io)
-        Base.IRShow.show_ir(lambda_io, src, lineprinter(src), warntype_type_printer)
+        irshow_config = Base.IRShow.IRShowConfig(lineprinter(src), warntype_type_printer)
+        Base.IRShow.show_ir(lambda_io, src, irshow_config)
         println(io)
     end
     nothing
@@ -140,8 +142,8 @@ import Base.CodegenParams
 # Printing code representations in IR and assembly
 function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
                         strip_ir_metadata::Bool, dump_module::Bool, syntax::Symbol,
-                        optimize::Bool, debuginfo::Symbol,
-                        params::CodegenParams=CodegenParams())
+                        optimize::Bool, debuginfo::Symbol, binary::Bool,
+                        params::CodegenParams=CodegenParams(debug_info_kind=Cint(0)))
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
     if isa(f, Core.Builtin)
         throw(ArgumentError("argument is not a generic function"))
@@ -151,8 +153,20 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
     match = Base._which(signature_type(f, t), world)
     linfo = Core.Compiler.specialize_method(match)
     # get the code for it
+    if debuginfo === :default
+        debuginfo = :source
+    elseif debuginfo !== :source && debuginfo !== :none
+        throw(ArgumentError("'debuginfo' must be either :source or :none"))
+    end
     if native
-        str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo)
+        if syntax !== :att && syntax !== :intel
+            throw(ArgumentError("'syntax' must be either :intel or :att"))
+        end
+        if dump_module
+            str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary, params)
+        else
+            str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary)
+        end
     else
         str = _dump_function_linfo_llvm(linfo, world, wrapper, strip_ir_metadata, dump_module, optimize, debuginfo, params)
     end
@@ -161,18 +175,19 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe
     return str
 end
 
-function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol)
-    if syntax !== :att && syntax !== :intel
-        throw(ArgumentError("'syntax' must be either :intel or :att"))
-    end
-    if debuginfo === :default
-        debuginfo = :source
-    elseif debuginfo !== :source && debuginfo !== :none
-        throw(ArgumentError("'debuginfo' must be either :source or :none"))
-    end
+function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool)
     str = ccall(:jl_dump_method_asm, Ref{String},
-                (Any, UInt, Cint, Bool, Ptr{UInt8}, Ptr{UInt8}),
-                linfo, world, 0, wrapper, syntax, debuginfo)
+                (Any, UInt, Bool, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
+                linfo, world, false, wrapper, syntax, debuginfo, binary)
+    return str
+end
+
+function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool, params::CodegenParams)
+    llvmf = ccall(:jl_get_llvmf_defn, Ptr{Cvoid}, (Any, UInt, Bool, Bool, CodegenParams), linfo, world, wrapper, true, params)
+    llvmf == C_NULL && error("could not compile the specified method")
+    str = ccall(:jl_dump_function_asm, Ref{String},
+                (Ptr{Cvoid}, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
+                llvmf, false, syntax, debuginfo, binary)
     return str
 end
 
@@ -181,11 +196,6 @@ function _dump_function_linfo_llvm(
         strip_ir_metadata::Bool, dump_module::Bool,
         optimize::Bool, debuginfo::Symbol,
         params::CodegenParams)
-    if debuginfo === :default
-        debuginfo = :source
-    elseif debuginfo !== :source && debuginfo !== :none
-        throw(ArgumentError("'debuginfo' must be either :source or :none"))
-    end
     llvmf = ccall(:jl_get_llvmf_defn, Ptr{Cvoid}, (Any, UInt, Bool, Bool, CodegenParams), linfo, world, wrapper, optimize, params)
     llvmf == C_NULL && error("could not compile the specified method")
     str = ccall(:jl_dump_function_ir, Ref{String},
@@ -207,7 +217,7 @@ Keyword argument `debuginfo` may be one of source (default) or none, to specify
 """
 function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool,
                    dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default)
-    d = _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo)
+    d = _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo, false)
     if highlighting[:llvm] && get(io, :color, false)
         print_llvm(io, d)
     else
@@ -217,29 +227,30 @@ end
 code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Tuple); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
     code_llvm(io, f, types, raw, dump_module, optimize, debuginfo)
 code_llvm(@nospecialize(f), @nospecialize(types=Tuple); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
-    code_llvm(stdout, f, types; raw=raw, dump_module=dump_module, optimize=optimize, debuginfo=debuginfo)
+    code_llvm(stdout, f, types; raw, dump_module, optimize, debuginfo)
 
 
 """
-    code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default)
+    code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default, binary=false, dump_module=true)
 
 Prints the native assembly instructions generated for running the method matching the given
 generic function and type signature to `io`.
 Switch assembly syntax using `syntax` symbol parameter set to `:att` for AT&T syntax or `:intel` for Intel syntax.
 Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
+If `binary` is `true`, it also prints the binary machine code for each instruction precedented by an abbreviated address.
 """
 function code_native(io::IO, @nospecialize(f), @nospecialize(types=Tuple);
-                     syntax::Symbol=:att, debuginfo::Symbol=:default)
-    d = _dump_function(f, types, true, false, false, false, syntax, true, debuginfo)
+                     dump_module::Bool=true, syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false)
+    d = _dump_function(f, types, true, false, false, dump_module, syntax, true, debuginfo, binary)
     if highlighting[:native] && get(io, :color, false)
         print_native(io, d)
     else
         print(io, d)
     end
 end
-code_native(@nospecialize(f), @nospecialize(types=Tuple); syntax::Symbol=:att, debuginfo::Symbol=:default) =
-    code_native(stdout, f, types; syntax=syntax, debuginfo=debuginfo)
-code_native(::IO, ::Any, ::Symbol) = error("illegal code_native call") # resolve ambiguous call
+code_native(@nospecialize(f), @nospecialize(types=Tuple); dump_module::Bool=true, syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false) =
+    code_native(stdout, f, types; dump_module, syntax, debuginfo, binary)
+code_native(::IO, ::Any, ::Symbol) = error("invalid code_native call") # resolve ambiguous call
 
 ## colorized IR and assembly printing
 
diff --git a/stdlib/InteractiveUtils/src/editless.jl b/stdlib/InteractiveUtils/src/editless.jl
index 4c18682d671b3d..7a96323c9cdeb2 100644
--- a/stdlib/InteractiveUtils/src/editless.jl
+++ b/stdlib/InteractiveUtils/src/editless.jl
@@ -192,7 +192,7 @@ Edit a file or directory optionally providing a line number to edit the file at.
 Return to the `julia` prompt when you quit the editor. The editor can be changed
 by setting `JULIA_EDITOR`, `VISUAL` or `EDITOR` as an environment variable.
 
-See also: [`define_editor`](@ref)
+See also [`define_editor`](@ref).
 """
 function edit(path::AbstractString, line::Integer=0)
     path isa String || (path = convert(String, path))
@@ -222,9 +222,17 @@ method to edit. For modules, open the main source file. The module needs to be l
 To ensure that the file can be opened at the given line, you may need to call
 `define_editor` first.
 """
-edit(f)                   = edit(functionloc(f)...)
-edit(f, @nospecialize t)  = edit(functionloc(f,t)...)
-edit(file, line::Integer) = error("could not find source file for function")
+function edit(@nospecialize f)
+    ms = methods(f).ms
+    length(ms) == 1 && edit(functionloc(ms[1])...)
+    length(ms) > 1 && return ms
+    length(ms) == 0 && functionloc(f) # throws
+    nothing
+end
+edit(m::Method) = edit(functionloc(m)...)
+edit(@nospecialize(f), idx::Integer) = edit(methods(f).ms[idx])
+edit(f, t)  = (@nospecialize; edit(functionloc(f, t)...))
+edit(file::Nothing, line::Integer) = error("could not find source file for function")
 edit(m::Module) = edit(pathof(m))
 
 # terminal pager
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index 011a0034378b27..cb7dbde1449069 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -4,7 +4,7 @@
 
 import Base: typesof, insert!
 
-separate_kwargs(args...; kwargs...) = (args, kwargs.data)
+separate_kwargs(args...; kwargs...) = (args, values(kwargs))
 
 """
 Transform a dot expression into one where each argument has been replaced by a
@@ -187,7 +187,7 @@ function gen_call_with_extracted_types_and_kwargs(__module__, fcn, ex0)
             if length(x.args) != 2
                 return Expr(:call, :error, "Invalid keyword argument: $x")
             end
-            push!(kws, Expr(:kw, x.args[1], x.args[2]))
+            push!(kws, Expr(:kw, esc(x.args[1]), esc(x.args[2])))
         else
             return Expr(:call, :error, "@$fcn expects only one non-keyword argument")
         end
@@ -232,6 +232,17 @@ macro code_lowered(ex0...)
     end
 end
 
+macro time_imports(ex)
+    quote
+        try
+            Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1)
+            $(esc(ex))
+        finally
+            Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1)
+        end
+    end
+end
+
 """
     @functionloc
 
@@ -247,7 +258,9 @@ It calls out to the `functionloc` function.
 Applied to a function or macro call, it evaluates the arguments to the specified call, and
 returns the `Method` object for the method that would be called for those arguments. Applied
 to a variable, it returns the module in which the variable was bound. It calls out to the
-`which` function.
+[`which`](@ref) function.
+
+See also: [`@less`](@ref), [`@edit`](@ref).
 """
 :@which
 
@@ -256,6 +269,8 @@ to a variable, it returns the module in which the variable was bound. It calls o
 
 Evaluates the arguments to the function or macro call, determines their types, and calls the `less`
 function on the resulting expression.
+
+See also: [`@edit`](@ref), [`@which`](@ref), [`@code_lowered`](@ref).
 """
 :@less
 
@@ -264,6 +279,8 @@ function on the resulting expression.
 
 Evaluates the arguments to the function or macro call, determines their types, and calls the `edit`
 function on the resulting expression.
+
+See also: [`@less`](@ref), [`@which`](@ref).
 """
 :@edit
 
@@ -326,3 +343,36 @@ Set the optional keyword argument `debuginfo` by putting it before the function
 `debuginfo` may be one of `:source` (default) or `:none`, to specify the verbosity of code comments.
 """
 :@code_native
+
+"""
+    @time_imports
+
+A macro to execute an expression and produce a report of any time spent importing packages and their
+dependencies.
+
+If a package's dependencies have already been imported either globally or by another dependency they will
+not appear under that package and the package will accurately report a faster load time than if it were to
+be loaded in isolation.
+
+```julia-repl
+julia> @time_imports using CSV
+      3.5 ms    ┌ IteratorInterfaceExtensions
+     27.4 ms  ┌ TableTraits
+    614.0 ms  ┌ SentinelArrays
+    138.6 ms  ┌ Parsers
+      2.7 ms  ┌ DataValueInterfaces
+      3.4 ms    ┌ DataAPI
+     59.0 ms  ┌ WeakRefStrings
+     35.4 ms  ┌ Tables
+     49.5 ms  ┌ PooledArrays
+    972.1 ms  CSV
+```
+
+!!! note
+    During the load process a package sequentially imports where necessary all of its dependencies, not just
+    its direct dependencies. That is also true for the dependencies themselves so nested importing will likely
+    occur, but not always. Therefore the nesting shown in this output report is not equivalent to the dependency
+    tree, but does indicate where import time has accumulated.
+
+"""
+:@time_imports
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index b272f46f147941..43f65c9bcb6cce 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -60,7 +60,7 @@ end
 Base.getindex(A::Stable, i) = A.A[i]
 Base.getindex(A::Unstable, i) = A.A[i]
 
-tag = "ARRAY{FLOAT64, N}"
+tag = "ARRAY"
 @test warntype_hastag(getindex, Tuple{Unstable{Float64},Int}, tag)
 @test !warntype_hastag(getindex, Tuple{Stable{Float64,2},Int}, tag)
 @test warntype_hastag(getindex, Tuple{Stable{Float64},Int}, tag)
@@ -199,6 +199,10 @@ end
 let v = repr(varinfo(_test_varinfo_, all = true, recursive = true))
     @test occursin("inner_x", v)
 end
+let v = repr(varinfo(_test_varinfo_, all = true, minsize = 9))
+    @test !occursin("x_exported", v) # excluded: 8 bytes
+    @test occursin("a_smaller", v)
+end
 
 # Issue 14173
 module Tmp14173
@@ -331,7 +335,7 @@ let err = tempname(),
         redirect_stderr(new_stderr)
         println(new_stderr, "start")
         flush(new_stderr)
-        @eval @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
+        @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
         Libc.flush_cstdio()
         println(new_stderr, "end")
         flush(new_stderr)
@@ -341,10 +345,11 @@ let err = tempname(),
         close(new_stderr)
         let errstr = read(err, String)
             @test startswith(errstr, """start
+                end
                 Internal error: encountered unexpected error during compilation of f_broken_code:
                 ErrorException(\"unsupported or misplaced expression \"invalid\" in function f_broken_code\")
                 """) || errstr
-            @test endswith(errstr, "\nend\n") || errstr
+            @test !endswith(errstr, "\nend\n") || errstr
         end
         rm(err)
     end
@@ -442,23 +447,36 @@ if Sys.ARCH === :x86_64 || occursin(ix86, string(Sys.ARCH))
 
     rgx = r"%"
     buf = IOBuffer()
-    output = ""
     #test that the string output is at&t syntax by checking for occurrences of '%'s
     code_native(buf, linear_foo, (), syntax = :att, debuginfo = :none)
-    output = String(take!(buf))
-
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test occursin(rgx, output)
 
     #test that the code output is intel syntax by checking it has no occurrences of '%'
     code_native(buf, linear_foo, (), syntax = :intel, debuginfo = :none)
-    output = String(take!(buf))
-
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test !occursin(rgx, output)
 
     code_native(buf, linear_foo, ())
     output = String(take!(buf))
-
     @test occursin(rgx, output)
+
+    @testset "binary" begin
+        # check the RET instruction (opcode: C3)
+        ret = r"^; [0-9a-f]{4}: c3$"m
+
+        # without binary flag (default)
+        code_native(buf, linear_foo, (), dump_module=false)
+        output = String(take!(buf))
+        @test !occursin(ret, output)
+
+        # with binary flag
+        for binary in false:true
+            code_native(buf, linear_foo, (); binary, dump_module=false)
+            output = String(take!(buf))
+            @test occursin(ret, output) == binary
+        end
+    end
 end
 
 @testset "error message" begin
@@ -555,3 +573,54 @@ file, ln = functionloc(versioninfo, Tuple{})
     code_native(io, eltype, Tuple{Int})
     @test occursin("eltype", String(take!(io)))
 end
+
+@testset "Issue #41010" begin
+    struct A41010 end
+
+    struct B41010
+        a::A41010
+    end
+    export B41010
+
+    ms = methodswith(A41010, @__MODULE__) |> collect
+    @test ms[1].name == :B41010
+end
+
+# macro options should accept both literals and variables
+let
+    opt = false
+    @test !(first(@code_typed optimize=opt sum(1:10)).inferred)
+end
+
+@testset "@time_imports" begin
+    mktempdir() do dir
+        cd(dir) do
+            try
+                pushfirst!(LOAD_PATH, dir)
+                foo_file = joinpath(dir, "Foo3242.jl")
+                write(foo_file,
+                    """
+                    module Foo3242
+                    foo() = 1
+                    end
+                    """)
+
+                Base.compilecache(Base.PkgId("Foo3242"))
+
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stdout(f) do
+                    @eval @time_imports using Foo3242
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("ms  Foo3242\n", String(buf))
+
+            finally
+                filter!((≠)(dir), LOAD_PATH)
+            end
+        end
+    end
+end
diff --git a/stdlib/LLVMLibUnwind_jll/Project.toml b/stdlib/LLVMLibUnwind_jll/Project.toml
index 27f22465d1aa1d..36c24111d4d311 100644
--- a/stdlib/LLVMLibUnwind_jll/Project.toml
+++ b/stdlib/LLVMLibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLVMLibUnwind_jll"
 uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
-version = "11.0.1+1"
+version = "12.0.1+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LibCURL.version b/stdlib/LibCURL.version
index aa3dc34719df96..92a70ccc9ee186 100644
--- a/stdlib/LibCURL.version
+++ b/stdlib/LibCURL.version
@@ -1,2 +1,2 @@
 LIBCURL_BRANCH = master
-LIBCURL_SHA1 = 8310487053915d5c995513f569ad85ba65c3544f
+LIBCURL_SHA1 = cddeb7f4a7d5718a4a1be602ffcbe68299a1a37e
diff --git a/stdlib/LibGit2/src/error.jl b/stdlib/LibGit2/src/error.jl
index 3e56999d5a52c9..d742cde1605b83 100644
--- a/stdlib/LibGit2/src/error.jl
+++ b/stdlib/LibGit2/src/error.jl
@@ -77,7 +77,7 @@ end
 struct GitError <: Exception
     class::Class
     code::Code
-    msg::AbstractString
+    msg::String
 end
 Base.show(io::IO, err::GitError) = print(io, "GitError(Code:$(err.code), Class:$(err.class), $(err.msg))")
 
@@ -95,8 +95,8 @@ function last_error()
     return (err_class, err_msg)
 end
 
-function GitError(code::Integer)
-    err_code = Code(code)
+GitError(err_code::Integer) = GitError(Code(err_code))
+function GitError(err_code::Code)
     err_class, err_msg = last_error()
     return GitError(err_class, err_code, err_msg)
 end
diff --git a/stdlib/LibGit2/src/gitcredential.jl b/stdlib/LibGit2/src/gitcredential.jl
index 0a442337531a79..1b97c29cd933e6 100644
--- a/stdlib/LibGit2/src/gitcredential.jl
+++ b/stdlib/LibGit2/src/gitcredential.jl
@@ -30,7 +30,12 @@ function GitCredential(cfg::GitConfig, url::AbstractString)
     fill!(cfg, parse(GitCredential, url))
 end
 
-GitCredential(cred::UserPasswordCredential, url::AbstractString) = parse(GitCredential, url)
+function GitCredential(user_pass_cred::UserPasswordCredential, url::AbstractString)
+    cred = parse(GitCredential, url)
+    cred.username = user_pass_cred.user
+    cred.password = deepcopy(user_pass_cred.pass)
+    return cred
+end
 
 Base.:(==)(c1::GitCredential, c2::GitCredential) = (c1.protocol, c1.host, c1.path, c1.username, c1.password, c1.use_http_path) ==
                                                    (c2.protocol, c2.host, c2.path, c2.username, c2.password, c2.use_http_path)
diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl
index 129f526812926e..9ffcaa36461279 100644
--- a/stdlib/LibGit2/src/types.jl
+++ b/stdlib/LibGit2/src/types.jl
@@ -192,7 +192,7 @@ The fields represent:
     perfdata_cb::Ptr{Cvoid}      = C_NULL
     perfdata_payload::Any        = Nothing
 end
-@assert CheckoutOptions.isinlinealloc
+@assert Base.allocatedinline(CheckoutOptions)
 
 """
     LibGit2.TransferProgress
@@ -209,7 +209,7 @@ Matches the [`git_indexer_progress`](https://libgit2.org/libgit2/#HEAD/type/git_
     indexed_deltas::Cuint   = Cuint(0)
     received_bytes::Csize_t = Csize_t(0)
 end
-@assert TransferProgress.isinlinealloc
+@assert Base.allocatedinline(TransferProgress)
 
 """
     LibGit2.RemoteCallbacks
@@ -235,7 +235,7 @@ Matches the [`git_remote_callbacks`](https://libgit2.org/libgit2/#HEAD/type/git_
         resolve_url::Ptr{Cvoid}        = C_NULL
     end
 end
-@assert RemoteCallbacks.isinlinealloc
+@assert Base.allocatedinline(RemoteCallbacks)
 
 """
     LibGit2.Callbacks
@@ -313,7 +313,7 @@ julia> fetch(remote, "master", options=fo)
     certificate_cb::Ptr{Cvoid}   = certificate_cb()
     payload::Any                 = nothing
 end
-@assert ProxyOptions.isinlinealloc
+@assert Base.allocatedinline(ProxyOptions)
 
 """
     LibGit2.FetchOptions
@@ -347,7 +347,7 @@ The fields represent:
         custom_headers::StrArrayStruct = StrArrayStruct()
     end
 end
-@assert FetchOptions.isinlinealloc
+@assert Base.allocatedinline(FetchOptions)
 
 
 """
@@ -384,7 +384,7 @@ The fields represent:
     remote_cb::Ptr{Cvoid}               = C_NULL
     remote_cb_payload::Any              = nothing
 end
-@assert CloneOptions.isinlinealloc
+@assert Base.allocatedinline(CloneOptions)
 
 """
     LibGit2.DiffOptionsStruct
@@ -438,7 +438,7 @@ The fields represent:
     old_prefix::Cstring                      = Cstring(C_NULL)
     new_prefix::Cstring                      = Cstring(C_NULL)
 end
-@assert DiffOptionsStruct.isinlinealloc
+@assert Base.allocatedinline(DiffOptionsStruct)
 
 """
     LibGit2.DescribeOptions
@@ -468,7 +468,7 @@ The fields represent:
     only_follow_first_parent::Cint    = Cint(0)
     show_commit_oid_as_fallback::Cint = Cint(0)
 end
-@assert DescribeOptions.isinlinealloc
+@assert Base.allocatedinline(DescribeOptions)
 
 """
     LibGit2.DescribeFormatOptions
@@ -487,7 +487,7 @@ The fields represent:
     always_use_long_format::Cint = Cint(0)
     dirty_suffix::Cstring        = Cstring(C_NULL)
 end
-@assert DescribeFormatOptions.isinlinealloc
+@assert Base.allocatedinline(DescribeFormatOptions)
 
 """
     LibGit2.DiffFile
@@ -617,7 +617,7 @@ The fields represent:
     file_favor::GIT_MERGE_FILE_FAVOR  = Consts.MERGE_FILE_FAVOR_NORMAL
     file_flags::GIT_MERGE_FILE        = Consts.MERGE_FILE_DEFAULT
 end
-@assert MergeOptions.isinlinealloc
+@assert Base.allocatedinline(MergeOptions)
 
 """
     LibGit2.BlameOptions
@@ -647,7 +647,7 @@ The fields represent:
     min_line::Csize_t                 = Csize_t(1)
     max_line::Csize_t                 = Csize_t(0)
 end
-@assert BlameOptions.isinlinealloc
+@assert Base.allocatedinline(BlameOptions)
 
 
 """
@@ -678,7 +678,7 @@ The fields represent:
         custom_headers::StrArrayStruct = StrArrayStruct()
     end
 end
-@assert PushOptions.isinlinealloc
+@assert Base.allocatedinline(PushOptions)
 
 
 """
@@ -701,7 +701,7 @@ The fields represent:
     merge_opts::MergeOptions = MergeOptions()
     checkout_opts::CheckoutOptions = CheckoutOptions()
 end
-@assert CherrypickOptions.isinlinealloc
+@assert Base.allocatedinline(CherrypickOptions)
 
 
 """
@@ -771,7 +771,7 @@ The fields represent:
     end
     checkout_opts::CheckoutOptions = CheckoutOptions()
 end
-@assert RebaseOptions.isinlinealloc
+@assert Base.allocatedinline(RebaseOptions)
 
 """
     LibGit2.RebaseOperation
@@ -834,7 +834,7 @@ The fields represent:
         baseline::Ptr{Cvoid} = C_NULL
     end
 end
-@assert StatusOptions.isinlinealloc
+@assert Base.allocatedinline(StatusOptions)
 
 """
     LibGit2.StatusEntry
@@ -902,7 +902,7 @@ Matches the [`git_config_entry`](https://libgit2.org/libgit2/#HEAD/type/git_conf
     free::Ptr{Cvoid}    = C_NULL
     payload::Any        = nothing
 end
-@assert ConfigEntry.isinlinealloc
+@assert Base.allocatedinline(ConfigEntry)
 
 function Base.show(io::IO, ce::ConfigEntry)
     print(io, "ConfigEntry(\"", unsafe_string(ce.name), "\", \"", unsafe_string(ce.value), "\")")
@@ -1136,7 +1136,7 @@ The fields represent:
 
     boundary::Char                        = '\0'
 end
-@assert BlameHunk.isinlinealloc
+@assert Base.allocatedinline(BlameHunk)
 
 """
     with(f::Function, obj)
diff --git a/stdlib/LibGit2/test/libgit2.jl b/stdlib/LibGit2/test/libgit2.jl
index 93e530aee5d8e3..892fb8bb6f3fed 100644
--- a/stdlib/LibGit2/test/libgit2.jl
+++ b/stdlib/LibGit2/test/libgit2.jl
@@ -600,6 +600,23 @@ end
         github_regex_test("ssh://git@github.com/$user/$repo", user, repo)
         @test !occursin(LibGit2.GITHUB_REGEX, "git@notgithub.com/$user/$repo.git")
     end
+
+    @testset "UserPasswordCredential/url constructor" begin
+        user_pass_cred = LibGit2.UserPasswordCredential("user", "*******")
+        url = "https://github.com"
+        expected_cred = LibGit2.GitCredential("https", "github.com", nothing, "user", "*******")
+
+        cred = LibGit2.GitCredential(user_pass_cred, url)
+        @test cred == expected_cred
+
+        # Shredding the UserPasswordCredential shouldn't result in information being lost
+        # inside of a GitCredential.
+        Base.shred!(user_pass_cred)
+        @test cred == expected_cred
+
+        Base.shred!(cred)
+        Base.shred!(expected_cred)
+    end
 end
 
 mktempdir() do dir
@@ -2133,6 +2150,50 @@ mktempdir() do dir
                 end
             end
         end
+
+        @testset "approve/reject with UserPasswordCredential" begin
+            # In order to use the "store" credential helper `git` needs to be installed and
+            # on the path.
+            if GIT_INSTALLED
+                config_path = joinpath(dir, config_file)
+                isfile(config_path) && rm(config_path)
+
+                credential_path = joinpath(dir, ".git-credentials")
+                isfile(credential_path) && rm(credential_path)
+
+                LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                    query = LibGit2.GitCredential("https", "mygithost")
+                    filled = LibGit2.GitCredential("https", "mygithost", nothing, "alice", "1234")
+                    user_pass_cred = LibGit2.UserPasswordCredential("alice", "1234")
+                    url = "https://mygithost"
+
+                    # Requires `git` to be installed and available on the path.
+                    LibGit2.set!(cfg, "credential.helper", "store --file \"$credential_path\"")
+                    helper = only(LibGit2.credential_helpers(cfg, query))
+
+                    @test !isfile(credential_path)
+
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+
+                    LibGit2.approve(cfg, user_pass_cred, url)
+                    @test isfile(credential_path)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == filled
+                    end
+
+                    LibGit2.reject(cfg, user_pass_cred, url)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+
+                    Base.shred!(query)
+                    Base.shred!(filled)
+                    Base.shred!(user_pass_cred)
+                end
+            end
+        end
     end
 
     # The following tests require that we can fake a TTY so that we can provide passwords
diff --git a/stdlib/LibGit2_jll/Project.toml b/stdlib/LibGit2_jll/Project.toml
index a10c39822dcec9..d38676c8740256 100644
--- a/stdlib/LibGit2_jll/Project.toml
+++ b/stdlib/LibGit2_jll/Project.toml
@@ -10,3 +10,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LibSSH2_jll/Project.toml b/stdlib/LibSSH2_jll/Project.toml
index 4f0becfcf3e6a4..b4e35274701503 100644
--- a/stdlib/LibSSH2_jll/Project.toml
+++ b/stdlib/LibSSH2_jll/Project.toml
@@ -9,3 +9,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml
index d07970cd436037..6950964b888a0a 100644
--- a/stdlib/LibUV_jll/Project.toml
+++ b/stdlib/LibUV_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUV_jll"
 uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
-version = "2.0.1+2"
+version = "2.0.1+4"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LibUnwind_jll/Project.toml b/stdlib/LibUnwind_jll/Project.toml
index 4619454255ca1a..6068f5df7bc969 100644
--- a/stdlib/LibUnwind_jll/Project.toml
+++ b/stdlib/LibUnwind_jll/Project.toml
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
index 52e78609992878..baafe762cea3f7 100644
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ b/stdlib/LinearAlgebra/docs/src/index.md
@@ -410,6 +410,7 @@ LinearAlgebra.nullspace
 Base.kron
 Base.kron!
 LinearAlgebra.exp(::StridedMatrix{<:LinearAlgebra.BlasFloat})
+Base.cis(::AbstractMatrix)
 Base.:^(::AbstractMatrix, ::Number)
 Base.:^(::Number, ::AbstractMatrix)
 LinearAlgebra.log(::StridedMatrix)
@@ -576,6 +577,7 @@ LinearAlgebra.BLAS.trmv
 LinearAlgebra.BLAS.trsv!
 LinearAlgebra.BLAS.trsv
 LinearAlgebra.BLAS.set_num_threads
+LinearAlgebra.BLAS.get_num_threads
 ```
 
 ## LAPACK functions
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index 855e49265af2db..4ac4a142caf073 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -15,8 +15,9 @@ import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, as
     oneunit, parent, power_by_squaring, print_matrix, promote_rule, real, round, sec, sech,
     setindex!, show, similar, sin, sincos, sinh, size, sqrt,
     strides, stride, tan, tanh, transpose, trunc, typed_hcat, vec
-using Base: hvcat_fill, IndexLinear, promote_op, promote_typeof,
-    @propagate_inbounds, @pure, reduce, typed_vcat, require_one_based_indexing
+using Base: IndexLinear, promote_eltype, promote_op, promote_typeof,
+    @propagate_inbounds, @pure, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
+    splat
 using Base.Broadcast: Broadcasted, broadcasted
 import Libdl
 
@@ -35,6 +36,7 @@ export
     BunchKaufman,
     Cholesky,
     CholeskyPivoted,
+    ColumnNorm,
     Eigen,
     GeneralizedEigen,
     GeneralizedSVD,
@@ -42,12 +44,14 @@ export
     Hessenberg,
     LU,
     LDLt,
+    NoPivot,
     QR,
     QRPivoted,
     LQ,
     Schur,
     SVD,
     Hermitian,
+    RowMaximum,
     Symmetric,
     LowerTriangular,
     UpperTriangular,
@@ -164,6 +168,10 @@ abstract type Algorithm end
 struct DivideAndConquer <: Algorithm end
 struct QRIteration <: Algorithm end
 
+abstract type PivotingStrategy end
+struct NoPivot <: PivotingStrategy end
+struct RowMaximum <: PivotingStrategy end
+struct ColumnNorm <: PivotingStrategy end
 
 # Check that stride of matrix/vector is 1
 # Writing like this to avoid splatting penalty when called with multiple arguments,
@@ -283,14 +291,14 @@ julia> ldiv!(Y, qr(A), X);
 julia> Y
 3-element Vector{Float64}:
   0.7128099173553719
- -0.051652892561983806
-  0.10020661157024781
+ -0.051652892561983674
+  0.10020661157024757
 
 julia> A\\X
 3-element Vector{Float64}:
   0.7128099173553719
- -0.05165289256198342
-  0.1002066115702479
+ -0.05165289256198333
+  0.10020661157024785
 ```
 """
 ldiv!(Y, A, B)
@@ -320,14 +328,14 @@ julia> ldiv!(qr(A), X);
 julia> X
 3-element Vector{Float64}:
   0.7128099173553719
- -0.051652892561983806
-  0.10020661157024781
+ -0.051652892561983674
+  0.10020661157024757
 
 julia> A\\Y
 3-element Vector{Float64}:
   0.7128099173553719
- -0.05165289256198342
-  0.1002066115702479
+ -0.05165289256198333
+  0.10020661157024785
 ```
 """
 ldiv!(A, B)
@@ -347,8 +355,58 @@ control over the factorization of `B`.
 """
 rdiv!(A, B)
 
-copy_oftype(A::AbstractArray{T}, ::Type{T}) where {T} = copy(A)
-copy_oftype(A::AbstractArray{T,N}, ::Type{S}) where {T,N,S} = convert(AbstractArray{S,N}, A)
+
+
+"""
+    copy_oftype(A, T)
+
+Copy `A` to a mutable array with eltype `T` based on `similar(A, T)`.
+
+The resulting matrix typically has similar algebraic structure as `A`. For
+example, supplying a tridiagonal matrix results in another tridiagonal matrix.
+In general, the type of the output corresponds to that of `similar(A, T)`.
+
+There are three often used methods in LinearAlgebra to create a mutable copy
+of an array with a given eltype. These copies can be passed to in-place
+algorithms (such as ldiv!, rdiv!, lu! and so on). Which one to use in practice
+depends on what is known (or assumed) about the structure of the array in that
+algorithm.
+
+See also: `copy_similar`, `copy_to_array`.
+"""
+copy_oftype(A::AbstractArray, ::Type{T}) where {T} = copyto!(similar(A,T), A)
+
+"""
+    copy_similar(A, T)
+
+Copy `A` to a mutable array with eltype `T` based on `similar(A, T, size(A))`.
+
+Compared to `copy_oftype`, the result can be more flexible. For example,
+supplying a tridiagonal matrix results in a sparse array. In general, the type
+of the output corresponds to that of the three-argument method `similar(A, T, size(s))`.
+
+See also: `copy_oftype`, `copy_to_array`.
+"""
+copy_similar(A::AbstractArray, ::Type{T}) where {T} = copyto!(similar(A, T, size(A)), A)
+
+"""
+    copy_to_array(A, T)
+
+Copy `A` to a regular dense `Array` with element type `T`.
+
+The resulting array is mutable. It can be used, for example, to pass the data of
+`A` to an efficient in-place method for a matrix factorization such as `lu!`, in
+cases where a more specific implementation of `lu!` (or `lu`) is not available.
+
+See also: `copy_oftype`, `copy_similar`
+"""
+copy_to_array(A::AbstractArray, ::Type{T}) where {T} = copyto!(Array{T}(undef, size(A)...), A)
+
+# The three copy functions above return mutable arrays with eltype T.
+# To only ensure a certain eltype, and if a mutable copy is not needed, it is
+# more efficient to use:
+# convert(AbstractArray{T}, A)
+
 
 include("adjtrans.jl")
 include("transpose.jl")
@@ -390,6 +448,63 @@ const ⋅ = dot
 const × = cross
 export ⋅, ×
 
+## convenience methods
+## return only the solution of a least squares problem while avoiding promoting
+## vectors to matrices.
+_cut_B(x::AbstractVector, r::UnitRange) = length(x)  > length(r) ? x[r]   : x
+_cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
+
+## append right hand side with zeros if necessary
+_zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
+_zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
+
+# General fallback definition for handling under- and overdetermined system as well as square problems
+# While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
+# which is required by LAPACK but not SuiteSpase which allows real-complex solves in some cases. Hence,
+# we restrict this method to only the LAPACK factorizations in LinearAlgebra.
+# The definition is put here since it explicitly references all the Factorizion structs so it has
+# to be located after all the files that define the structs.
+const LAPACKFactorizations{T,S} = Union{
+    BunchKaufman{T,S},
+    Cholesky{T,S},
+    LQ{T,S},
+    LU{T,S},
+    QR{T,S},
+    QRCompactWY{T,S},
+    QRPivoted{T,S},
+    SVD{T,<:Real,S}}
+function (\)(F::Union{<:LAPACKFactorizations,Adjoint{<:Any,<:LAPACKFactorizations}}, B::AbstractVecOrMat)
+    require_one_based_indexing(B)
+    m, n = size(F)
+    if m != size(B, 1)
+        throw(DimensionMismatch("arguments must have the same number of rows"))
+    end
+
+    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
+    FF = Factorization{TFB}(F)
+
+    # For wide problem we (often) compute a minimum norm solution. The solution
+    # is larger than the right hand side so we use size(F, 2).
+    BB = _zeros(TFB, B, n)
+
+    if n > size(B, 1)
+        # Underdetermined
+        copyto!(view(BB, 1:m, :), B)
+    else
+        copyto!(BB, B)
+    end
+
+    ldiv!(FF, BB)
+
+    # For tall problems, we compute a least squares solution so only part
+    # of the rhs should be returned from \ while ldiv! uses (and returns)
+    # the complete rhs
+    return _cut_B(BB, 1:n)
+end
+# disambiguate
+(\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    invoke(\, Tuple{Factorization{T}, VecOrMat{Complex{T}}}, F, B)
+
 """
     LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false)
 
@@ -461,8 +576,13 @@ function __init__()
     try
         libblas_path = find_library_path(Base.libblas_name)
         liblapack_path = find_library_path(Base.liblapack_name)
+        # We manually `dlopen()` these libraries here, so that we search with `libjulia-internal`'s
+        # `RPATH` and not `libblastrampoline's`.  Once it's been opened, when LBT tries to open it,
+        # it will find the library already loaded.
+        libblas_path = Libdl.dlpath(Libdl.dlopen(libblas_path))
         BLAS.lbt_forward(libblas_path; clear=true)
         if liblapack_path != libblas_path
+            liblapack_path = Libdl.dlpath(Libdl.dlopen(liblapack_path))
             BLAS.lbt_forward(liblapack_path)
         end
         BLAS.check()
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
index 56942930d7aa75..f5903f380ee533 100644
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ b/stdlib/LinearAlgebra/src/adjtrans.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Base: @propagate_inbounds, @_inline_meta
+using Base: @propagate_inbounds
 import Base: length, size, axes, IndexStyle, getindex, setindex!, parent, vec, convert, similar
 
 ### basic definitions (types, aliases, constructors, abstractarray interface, sundry similar)
@@ -34,10 +34,6 @@ julia> adjoint(A)
 """
 struct Adjoint{T,S} <: AbstractMatrix{T}
     parent::S
-    function Adjoint{T,S}(A::S) where {T,S}
-        checkeltype_adjoint(T, eltype(A))
-        new(A)
-    end
 end
 """
     Transpose
@@ -65,30 +61,6 @@ julia> transpose(A)
 """
 struct Transpose{T,S} <: AbstractMatrix{T}
     parent::S
-    function Transpose{T,S}(A::S) where {T,S}
-        checkeltype_transpose(T, eltype(A))
-        new(A)
-    end
-end
-
-function checkeltype_adjoint(::Type{ResultEltype}, ::Type{ParentEltype}) where {ResultEltype,ParentEltype}
-    Expected = Base.promote_op(adjoint, ParentEltype)
-    ResultEltype === Expected || error(string(
-        "Element type mismatch. Tried to create an `Adjoint{", ResultEltype, "}` ",
-        "from an object with eltype `", ParentEltype, "`, but the element type of ",
-        "the adjoint of an object with eltype `", ParentEltype, "` must be ",
-        "`", Expected, "`."))
-    return nothing
-end
-
-function checkeltype_transpose(::Type{ResultEltype}, ::Type{ParentEltype}) where {ResultEltype, ParentEltype}
-    Expected = Base.promote_op(transpose, ParentEltype)
-    ResultEltype === Expected || error(string(
-        "Element type mismatch. Tried to create a `Transpose{", ResultEltype, "}` ",
-        "from an object with eltype `", ParentEltype, "`, but the element type of ",
-        "the transpose of an object with eltype `", ParentEltype, "` must be ",
-        "`", Expected, "`."))
-    return nothing
 end
 
 # basic outer constructors
@@ -185,7 +157,9 @@ end
 # some aliases for internal convenience use
 const AdjOrTrans{T,S} = Union{Adjoint{T,S},Transpose{T,S}} where {T,S}
 const AdjointAbsVec{T} = Adjoint{T,<:AbstractVector}
+const AdjointAbsMat{T} = Adjoint{T,<:AbstractMatrix}
 const TransposeAbsVec{T} = Transpose{T,<:AbstractVector}
+const TransposeAbsMat{T} = Transpose{T,<:AbstractMatrix}
 const AdjOrTransAbsVec{T} = AdjOrTrans{T,<:AbstractVector}
 const AdjOrTransAbsMat{T} = AdjOrTrans{T,<:AbstractMatrix}
 
@@ -201,8 +175,8 @@ axes(v::AdjOrTransAbsVec) = (Base.OneTo(1), axes(v.parent)...)
 axes(A::AdjOrTransAbsMat) = reverse(axes(A.parent))
 IndexStyle(::Type{<:AdjOrTransAbsVec}) = IndexLinear()
 IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian()
-@propagate_inbounds getindex(v::AdjOrTransAbsVec, i::Int) = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])
-@propagate_inbounds getindex(A::AdjOrTransAbsMat, i::Int, j::Int) = wrapperop(A)(A.parent[j, i])
+@propagate_inbounds getindex(v::AdjOrTransAbsVec{T}, i::Int) where {T} = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])::T
+@propagate_inbounds getindex(A::AdjOrTransAbsMat{T}, i::Int, j::Int) where {T} = wrapperop(A)(A.parent[j, i])::T
 @propagate_inbounds setindex!(v::AdjOrTransAbsVec, x, i::Int) = (setindex!(v.parent, wrapperop(v)(x), i-1+first(axes(v.parent)[1])); v)
 @propagate_inbounds setindex!(A::AdjOrTransAbsMat, x, i::Int, j::Int) = (setindex!(A.parent, wrapperop(A)(x), j, i); A)
 # AbstractArray interface, additional definitions to retain wrapper over vectors where appropriate
@@ -236,9 +210,12 @@ similar(A::AdjOrTrans) = similar(A.parent, eltype(A), axes(A))
 similar(A::AdjOrTrans, ::Type{T}) where {T} = similar(A.parent, T, axes(A))
 similar(A::AdjOrTrans, ::Type{T}, dims::Dims{N}) where {T,N} = similar(A.parent, T, dims)
 
+# AbstractMatrix{T} constructor for adjtrans vector: preserve wrapped type
+AbstractMatrix{T}(A::AdjOrTransAbsVec) where {T} = wrapperop(A)(AbstractVector{T}(A.parent))
+
 # sundry basic definitions
 parent(A::AdjOrTrans) = A.parent
-vec(v::TransposeAbsVec) = parent(v)
+vec(v::TransposeAbsVec{<:Number}) = parent(v)
 vec(v::AdjointAbsVec{<:Real}) = parent(v)
 
 ### concatenation
@@ -275,6 +252,25 @@ Broadcast.broadcast_preserving_zero_d(f, avs::Union{Number,AdjointAbsVec}...) =
 Broadcast.broadcast_preserving_zero_d(f, tvs::Union{Number,TransposeAbsVec}...) = transpose(broadcast((xs...) -> transpose(f(transpose.(xs)...)), quasiparentt.(tvs)...))
 # TODO unify and allow mixed combinations with a broadcast style
 
+
+### reductions
+# faster to sum the Array than to work through the wrapper
+Base._mapreduce_dim(f, op, init::Base._InitialValue, A::Transpose, dims::Colon) =
+    transpose(Base._mapreduce_dim(_sandwich(transpose, f), _sandwich(transpose, op), init, parent(A), dims))
+Base._mapreduce_dim(f, op, init::Base._InitialValue, A::Adjoint, dims::Colon) =
+    adjoint(Base._mapreduce_dim(_sandwich(adjoint, f), _sandwich(adjoint, op), init, parent(A), dims))
+# sum(A'; dims)
+Base.mapreducedim!(f, op, B::AbstractArray, A::TransposeAbsMat) =
+    transpose(Base.mapreducedim!(_sandwich(transpose, f), _sandwich(transpose, op), transpose(B), parent(A)))
+Base.mapreducedim!(f, op, B::AbstractArray, A::AdjointAbsMat) =
+    adjoint(Base.mapreducedim!(_sandwich(adjoint, f), _sandwich(adjoint, op), adjoint(B), parent(A)))
+
+_sandwich(adj::Function, fun) = (xs...,) -> adj(fun(map(adj, xs)...))
+for fun in [:identity, :add_sum, :mul_prod] #, :max, :min]
+    @eval _sandwich(::Function, ::typeof(Base.$fun)) = Base.$fun
+end
+
+
 ### linear algebra
 
 (-)(A::Adjoint)   = Adjoint(  -A.parent)
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
index 69fbaa476de73d..1a75c6a9287f06 100644
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ b/stdlib/LinearAlgebra/src/bidiag.jl
@@ -390,18 +390,15 @@ const BiTri = Union{Bidiagonal,Tridiagonal}
 @inline mul!(C::AbstractMatrix,   A::AbstractTriangular, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix,   A::AbstractMatrix,     B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix,   A::Diagonal,           B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:AbstractTriangular}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:AbstractTriangular}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:AbstractVecOrMat}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:AbstractVecOrMat}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractVector,   A::BiTriSym,              B::AbstractVector, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix,   A::BiTriSym,              B::AbstractVecOrMat, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractVecOrMat, A::BiTriSym,              B::AbstractVecOrMat, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta)) # around bidiag line 330
+@inline mul!(C::AbstractVector, A::BiTriSym, B::AbstractVector, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BiTriSym, B::AbstractVecOrMat, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix, A::BiTriSym, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractVector, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = throw(MethodError(mul!, (C, A, B)), MulAddMul(alpha, beta))
+@inline mul!(C::AbstractVector, A::BiTriSym, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = throw(MethodError(mul!, (C, A, B)), MulAddMul(alpha, beta))
 
 function check_A_mul_B!_sizes(C, A, B)
     require_one_based_indexing(C)
@@ -635,8 +632,6 @@ end
 
 const UpperOrUnitUpperTriangular = Union{UpperTriangular, UnitUpperTriangular}
 const LowerOrUnitLowerTriangular = Union{LowerTriangular, UnitLowerTriangular}
-const AdjOrTransUpperOrUnitUpperTriangular = Union{Adjoint{<:Any, <:UpperOrUnitUpperTriangular}, Transpose{<:Any, <:UpperOrUnitUpperTriangular}}
-const AdjOrTransLowerOrUnitLowerTriangular = Union{Adjoint{<:Any, <:LowerOrUnitLowerTriangular}, Transpose{<:Any, <:LowerOrUnitLowerTriangular}}
 
 function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
@@ -647,15 +642,6 @@ function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
     end
 end
 
-function *(A::AdjOrTransUpperOrUnitUpperTriangular, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    if B.uplo == 'L'
-        A_mul_B_td!(LowerTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
-end
-
 function *(A::LowerOrUnitLowerTriangular, B::Bidiagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
     if B.uplo == 'L'
@@ -665,15 +651,6 @@ function *(A::LowerOrUnitLowerTriangular, B::Bidiagonal)
     end
 end
 
-function *(A::AdjOrTransLowerOrUnitLowerTriangular, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    if B.uplo == 'U'
-        A_mul_B_td!(UpperTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
-end
-
 function *(A::Union{SymTridiagonal, Tridiagonal}, B::AbstractTriangular)
     TS = promote_op(matprod, eltype(A), eltype(B))
     A_mul_B_td!(zeros(TS, size(A)...), A, B)
@@ -688,15 +665,6 @@ function *(A::Bidiagonal, B::UpperOrUnitUpperTriangular)
     end
 end
 
-function *(A::Bidiagonal, B::AdjOrTransUpperOrUnitUpperTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    if A.uplo == 'L'
-        A_mul_B_td!(LowerTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
-end
-
 function *(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
     TS = promote_op(matprod, eltype(A), eltype(B))
     if A.uplo == 'L'
@@ -706,15 +674,6 @@ function *(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
     end
 end
 
-function *(A::Bidiagonal, B::AdjOrTransLowerOrUnitLowerTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    if A.uplo == 'U'
-        A_mul_B_td!(UpperTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
-end
-
 function *(A::BiTri, B::Diagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
     A_mul_B_td!(similar(A, TS), A, B)
@@ -766,62 +725,13 @@ function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector)
 end
 
 #Linear solvers
-ldiv!(A::Union{Bidiagonal, AbstractTriangular}, b::AbstractVector) = naivesub!(A, b)
-ldiv!(A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVector) = ldiv!(copy(A), b)
-ldiv!(A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVector) = ldiv!(copy(A), b)
-function ldiv!(A::Union{Bidiagonal,AbstractTriangular}, B::AbstractMatrix)
-    require_one_based_indexing(A, B)
-    nA,mA = size(A)
-    tmp = similar(B,size(B,1))
-    n = size(B, 1)
-    if nA != n
-        throw(DimensionMismatch("size of A is ($nA,$mA), corresponding dimension of B is $n"))
-    end
-    for i = 1:size(B,2)
-        copyto!(tmp, 1, B, (i - 1)*n + 1, n)
-        ldiv!(A, tmp)
-        copyto!(B, (i - 1)*n + 1, tmp, 1, n) # Modify this when array view are implemented.
-    end
-    B
-end
-function ldiv!(adjA::Adjoint{<:Any,<:Union{Bidiagonal,AbstractTriangular}}, B::AbstractMatrix)
-    require_one_based_indexing(adjA, B)
-    A = adjA.parent
-    nA,mA = size(A)
-    tmp = similar(B,size(B,1))
-    n = size(B, 1)
-    if mA != n
-        throw(DimensionMismatch("size of adjoint of A is ($mA,$nA), corresponding dimension of B is $n"))
-    end
-    for i = 1:size(B,2)
-        copyto!(tmp, 1, B, (i - 1)*n + 1, n)
-        ldiv!(adjoint(A), tmp)
-        copyto!(B, (i - 1)*n + 1, tmp, 1, n) # Modify this when array view are implemented.
-    end
-    B
-end
-function ldiv!(transA::Transpose{<:Any,<:Union{Bidiagonal,AbstractTriangular}}, B::AbstractMatrix)
-    require_one_based_indexing(transA, B)
-    A = transA.parent
-    nA,mA = size(A)
-    tmp = similar(B,size(B,1))
-    n = size(B, 1)
-    if mA != n
-        throw(DimensionMismatch("size of transpose of A is ($mA,$nA), corresponding dimension of B is $n"))
-    end
-    for i = 1:size(B,2)
-        copyto!(tmp, 1, B, (i - 1)*n + 1, n)
-        ldiv!(transpose(A), tmp)
-        copyto!(B, (i - 1)*n + 1, tmp, 1, n) # Modify this when array view are implemented.
-    end
-    B
-end
 #Generic solver using naive substitution
-function naivesub!(A::Bidiagonal{T}, b::AbstractVector, x::AbstractVector = b) where T
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::Bidiagonal, b::AbstractVector)
+    require_one_based_indexing(A, b)
     N = size(A, 2)
-    if N != length(b) || N != length(x)
-        throw(DimensionMismatch("second dimension of A, $N, does not match one of the lengths of x, $(length(x)), or b, $(length(b))"))
+    mb = length(b)
+    if N != mb
+        throw(DimensionMismatch("second dimension of A, $N, does not match the length of b, $mb"))
     end
 
     if N == 0
@@ -830,33 +740,47 @@ function naivesub!(A::Bidiagonal{T}, b::AbstractVector, x::AbstractVector = b) w
 
     @inbounds begin
         if A.uplo == 'L' #do forward substitution
-            x[1] = xj1 = A.dv[1]\b[1]
-            for j = 2:N
-                xj  = b[j]
-                xj -= A.ev[j - 1] * xj1
+            b[1] = bj1 = A.dv[1]\b[1]
+            for j in 2:N
+                bj  = b[j]
+                bj -= A.ev[j - 1] * bj1
                 dvj = A.dv[j]
                 if iszero(dvj)
                     throw(SingularException(j))
                 end
-                xj   = dvj\xj
-                x[j] = xj1 = xj
+                bj   = dvj\bj
+                b[j] = bj1 = bj
             end
         else #do backward substitution
-            x[N] = xj1 = A.dv[N]\b[N]
+            b[N] = bj1 = A.dv[N]\b[N]
             for j = (N - 1):-1:1
-                xj  = b[j]
-                xj -= A.ev[j] * xj1
+                bj  = b[j]
+                bj -= A.ev[j] * bj1
                 dvj = A.dv[j]
                 if iszero(dvj)
                     throw(SingularException(j))
                 end
-                xj   = dvj\xj
-                x[j] = xj1 = xj
+                bj   = dvj\bj
+                b[j] = bj1 = bj
             end
         end
     end
-    return x
+    return b
+end
+function ldiv!(A::Bidiagonal, B::AbstractMatrix)
+    require_one_based_indexing(A, B)
+    mA, nA = size(A)
+    n = size(B, 1)
+    if mA != n
+        throw(DimensionMismatch("first dimension of A, $mA, does not match the first dimension of B, $n"))
+    end
+    for b in eachcol(B)
+        ldiv!(A, b)
+    end
+    B
 end
+ldiv!(A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = ldiv!(copy(A), b)
+ldiv!(A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = ldiv!(copy(A), b)
 
 ### Generic promotion methods and fallbacks
 function \(A::Bidiagonal{<:Number}, B::AbstractVecOrMat{<:Number})
@@ -865,20 +789,20 @@ function \(A::Bidiagonal{<:Number}, B::AbstractVecOrMat{<:Number})
     ldiv!(convert(AbstractArray{TAB}, A), copy_oftype(B, TAB))
 end
 \(A::Bidiagonal, B::AbstractVecOrMat) = ldiv!(A, copy(B))
-function \(transA::Transpose{<:Number,<:Bidiagonal{<:Number}}, B::AbstractVecOrMat{<:Number})
-    A = transA.parent
+function \(tA::Transpose{<:Number,<:Bidiagonal{<:Number}}, B::AbstractVecOrMat{<:Number})
+    A = tA.parent
     TA, TB = eltype(A), eltype(B)
     TAB = typeof((zero(TA)*zero(TB) + zero(TA)*zero(TB))/one(TA))
     ldiv!(transpose(convert(AbstractArray{TAB}, A)), copy_oftype(B, TAB))
 end
-\(transA::Transpose{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = ldiv!(transpose(transA.parent), copy(B))
+\(tA::Transpose{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = ldiv!(tA, copy(B))
 function \(adjA::Adjoint{<:Number,<:Bidiagonal{<:Number}}, B::AbstractVecOrMat{<:Number})
     A = adjA.parent
     TA, TB = eltype(A), eltype(B)
     TAB = typeof((zero(TA)*zero(TB) + zero(TA)*zero(TB))/one(TA))
     ldiv!(adjoint(convert(AbstractArray{TAB}, A)), copy_oftype(B, TAB))
 end
-\(adjA::Adjoint{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = ldiv!(adjoint(adjA.parent), copy(B))
+\(adjA::Adjoint{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = ldiv!(adjA, copy(B))
 
 factorize(A::Bidiagonal) = A
 
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 0ef4f14d092b5a..661e9e2b156177 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -79,7 +79,7 @@ import LinearAlgebra: BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismat
 include("lbt.jl")
 
 """
-get_config()
+    get_config()
 
 Return an object representing the current `libblastrampoline` configuration.
 
@@ -91,7 +91,7 @@ get_config() = lbt_get_config()
 # We hard-lock `vendor()` to `openblas(64)` here to satisfy older code, but all new code should use
 # `get_config()` since it is now possible to have multiple vendors loaded at once.
 function vendor()
-    Base.depwarn("`vendor()` is deprecated, use `BLAS.get_config()` and inspect the output instead", :vendor)
+    Base.depwarn("`vendor()` is deprecated, use `BLAS.get_config()` and inspect the output instead", :vendor; force=true)
     if USE_BLAS64
         return :openblas64
     else
@@ -218,15 +218,21 @@ end
 
 """
     scal!(n, a, X, incx)
+    scal!(a, X)
 
 Overwrite `X` with `a*X` for the first `n` elements of array `X` with stride `incx`. Returns `X`.
+
+If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used.
 """
 function scal! end
 
 """
     scal(n, a, X, incx)
+    scal(a, X)
 
 Return `X` scaled by `a` for the first `n` elements of array `X` with stride `incx`.
+
+If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used.
 """
 function scal end
 
@@ -242,9 +248,12 @@ for (fname, elty) in ((:dscal_,:Float64),
                   n, DA, DX, incx)
             DX
         end
+
+        scal!(DA::$elty, DX::AbstractArray{$elty}) = scal!(length(DX),DA,DX,stride(DX,1))
     end
 end
 scal(n, DA, DX, incx) = scal!(n, DA, copy(DX), incx)
+scal(DA, DX) = scal!(DA, copy(DX))
 
 ## dot
 
@@ -655,13 +664,19 @@ for (fname, elty) in ((:dgemv_,:Float64),
                 throw(DimensionMismatch("the transpose of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+            lda = stride(A,2)
+            lda >= max(1, size(A,1)) || error("`stride(A,2)` must be at least `max(1, size(A,1))`")
+            sX = stride(X,1)
+            pX = pointer(X, sX > 0 ? firstindex(X) : lastindex(X))
+            sY = stride(Y,1)
+            pY = pointer(Y, sY > 0 ? firstindex(Y) : lastindex(Y))
+            GC.@preserve X Y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
                  trans, size(A,1), size(A,2), alpha,
-                 A, max(1,stride(A,2)), X, stride(X,1),
-                 beta, Y, stride(Y,1), 1)
+                 A, lda, pX, sX,
+                 beta, pY, sY, 1)
             Y
         end
         function gemv(trans::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, X::AbstractVector{$elty})
diff --git a/stdlib/LinearAlgebra/src/bunchkaufman.jl b/stdlib/LinearAlgebra/src/bunchkaufman.jl
index 63254308799e13..75fb9ae7bf04eb 100644
--- a/stdlib/LinearAlgebra/src/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/src/bunchkaufman.jl
@@ -196,16 +196,14 @@ julia> S.L*S.D*S.L' - A[S.p, S.p]
 bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =
     bunchkaufman!(copy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
 
-convert(::Type{BunchKaufman{T}}, B::BunchKaufman{T}) where {T} = B
-convert(::Type{BunchKaufman{T}}, B::BunchKaufman) where {T} =
+BunchKaufman{T}(B::BunchKaufman) where {T} =
     BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info)
-convert(::Type{Factorization{T}}, B::BunchKaufman{T}) where {T} = B
-convert(::Type{Factorization{T}}, B::BunchKaufman) where {T} = convert(BunchKaufman{T}, B)
+Factorization{T}(B::BunchKaufman) where {T} = BunchKaufman{T}(B)
 
 size(B::BunchKaufman) = size(getfield(B, :LD))
 size(B::BunchKaufman, d::Integer) = size(getfield(B, :LD), d)
 issymmetric(B::BunchKaufman) = B.symmetric
-ishermitian(B::BunchKaufman) = !B.symmetric
+ishermitian(B::BunchKaufman{T}) where T = T<:Real || !B.symmetric
 
 function _ipiv2perm_bk(v::AbstractVector{T}, maxi::Integer, uplo::AbstractChar, rook::Bool) where T
     require_one_based_indexing(v)
@@ -279,6 +277,14 @@ Base.propertynames(B::BunchKaufman, private::Bool=false) =
 
 issuccess(B::BunchKaufman) = B.info == 0
 
+function adjoint(B::BunchKaufman)
+    if ishermitian(B)
+        return B
+    else
+        throw(ArgumentError("adjoint not implemented for complex symmetric matrices"))
+    end
+end
+
 function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, B::BunchKaufman)
     if issuccess(B)
         summary(io, B); println(io)
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
index 18ee4cb5c7dd90..bf738d5d203ec8 100644
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ b/stdlib/LinearAlgebra/src/cholesky.jl
@@ -110,8 +110,10 @@ positive semi-definite matrix `A`. This is the return type of [`cholesky(_, Val(
 the corresponding matrix factorization function.
 
 The triangular Cholesky factor can be obtained from the factorization `F::CholeskyPivoted`
-via `F.L` and `F.U`, and the permutation via `F.p`, where `A[F.p, F.p] ≈ F.U' * F.U ≈ F.L * F.L'`,
-or alternatively `A ≈ F.U[:, F.p]' * F.U[:, F.p] ≈ F.L[F.p, :] * F.L[F.p, :]'`.
+via `F.L` and `F.U`, and the permutation via `F.p`, where `A[F.p, F.p] ≈ Ur' * Ur ≈ Lr * Lr'`
+with `Ur = F.U[1:F.rank, :]` and `Lr = F.L[:, 1:F.rank]`, or alternatively
+`A ≈ Up' * Up ≈ Lp * Lp'` with `Up = F.U[1:F.rank, invperm(F.p)]` and
+`Lp = F.L[invperm(F.p), 1:F.rank]`.
 
 The following functions are available for `CholeskyPivoted` objects:
 [`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
@@ -120,25 +122,28 @@ Iterating the decomposition produces the components `L` and `U`.
 
 # Examples
 ```jldoctest
-julia> A = [4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-3×3 Matrix{Float64}:
-   4.0   12.0  -16.0
-  12.0   37.0  -43.0
- -16.0  -43.0   98.0
+julia> X = [1.0, 2.0, 3.0, 4.0];
+
+julia> A = X * X';
 
-julia> C = cholesky(A, Val(true))
+julia> C = cholesky(A, Val(true), check = false)
 CholeskyPivoted{Float64, Matrix{Float64}}
-U factor with rank 3:
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 9.89949  -4.34366  -1.61624
-  ⋅        4.25825   1.1694
-  ⋅         ⋅        0.142334
+U factor with rank 1:
+4×4 UpperTriangular{Float64, Matrix{Float64}}:
+ 4.0  2.0  3.0  1.0
+  ⋅   0.0  6.0  2.0
+  ⋅    ⋅   9.0  3.0
+  ⋅    ⋅    ⋅   1.0
 permutation:
-3-element Vector{Int64}:
- 3
+4-element Vector{Int64}:
+ 4
  2
+ 3
  1
 
+julia> C.U[1:C.rank, :]' * C.U[1:C.rank, :] ≈ A[C.p, C.p]
+true
+
 julia> l, u = C; # destructuring via iteration
 
 julia> l == C.L && u == C.U
@@ -388,6 +393,11 @@ true
 cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}},
     ::Val{false}=Val(false); check::Bool = true) = cholesky!(cholcopy(A); check = check)
 
+function cholesky(A::Union{StridedMatrix{Float16},RealHermSymComplexHerm{Float16,<:StridedMatrix}}, ::Val{false}=Val(false); check::Bool = true)
+    X = cholesky!(cholcopy(A); check = check)
+    return Cholesky{Float16}(X)
+end
+
 
 ## With pivoting
 """
@@ -398,8 +408,9 @@ and return a [`CholeskyPivoted`](@ref) factorization. The matrix `A` can either
 or [`Hermitian`](@ref) [`StridedMatrix`](@ref) or a *perfectly* symmetric or Hermitian `StridedMatrix`.
 
 The triangular Cholesky factor can be obtained from the factorization `F` via `F.L` and `F.U`,
-and the permutation via `F.p`, where `A[F.p, F.p] ≈ F.U' * F.U ≈ F.L * F.L'`, or alternatively
-`A ≈ F.U[:, F.p]' * F.U[:, F.p] ≈ F.L[F.p, :] * F.L[F.p, :]'`.
+and the permutation via `F.p`, where `A[F.p, F.p] ≈ Ur' * Ur ≈ Lr * Lr'` with `Ur = F.U[1:F.rank, :]`
+and `Lr = F.L[:, 1:F.rank]`, or alternatively `A ≈ Up' * Up ≈ Lp * Lp'` with
+`Up = F.U[1:F.rank, invperm(F.p)]` and `Lp = F.L[invperm(F.p), 1:F.rank]`.
 
 The following functions are available for `CholeskyPivoted` objects:
 [`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
@@ -416,26 +427,26 @@ validity (via [`issuccess`](@ref)) lies with the user.
 
 # Examples
 ```jldoctest
-julia> A = [4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-3×3 Matrix{Float64}:
-   4.0   12.0  -16.0
-  12.0   37.0  -43.0
- -16.0  -43.0   98.0
+julia> X = [1.0, 2.0, 3.0, 4.0];
 
-julia> C = cholesky(A, Val(true))
+julia> A = X * X';
+
+julia> C = cholesky(A, Val(true), check = false)
 CholeskyPivoted{Float64, Matrix{Float64}}
-U factor with rank 3:
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 9.89949  -4.34366  -1.61624
-  ⋅        4.25825   1.1694
-  ⋅         ⋅        0.142334
+U factor with rank 1:
+4×4 UpperTriangular{Float64, Matrix{Float64}}:
+ 4.0  2.0  3.0  1.0
+  ⋅   0.0  6.0  2.0
+  ⋅    ⋅   9.0  3.0
+  ⋅    ⋅    ⋅   1.0
 permutation:
-3-element Vector{Int64}:
- 3
+4-element Vector{Int64}:
+ 4
  2
+ 3
  1
 
-julia> C.U[:, C.p]' * C.U[:, C.p] ≈ A
+julia> C.U[1:C.rank, :]' * C.U[1:C.rank, :] ≈ A[C.p, C.p]
 true
 
 julia> l, u = C; # destructuring via iteration
@@ -529,6 +540,8 @@ Base.propertynames(F::CholeskyPivoted, private::Bool=false) =
 
 issuccess(C::Union{Cholesky,CholeskyPivoted}) = C.info == 0
 
+adjoint(C::Union{Cholesky,CholeskyPivoted}) = C
+
 function show(io::IO, mime::MIME{Symbol("text/plain")}, C::Cholesky{<:Any,<:AbstractMatrix})
     if issuccess(C)
         summary(io, C); println(io)
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
index ad288bef1ddc7d..14b74097d4438d 100644
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ b/stdlib/LinearAlgebra/src/dense.jl
@@ -13,7 +13,7 @@ const NRM2_CUTOFF = 32
 # This constant should ideally be determined by the actual CPU cache size
 const ISONE_CUTOFF = 2^21 # 2M
 
-function isone(A::StridedMatrix)
+function isone(A::AbstractMatrix)
     m, n = size(A)
     m != n && return false # only square matrices can satisfy x == one(x)
     if sizeof(A) < ISONE_CUTOFF
@@ -23,7 +23,7 @@ function isone(A::StridedMatrix)
     end
 end
 
-@inline function _isone_triacheck(A::StridedMatrix, m::Int)
+@inline function _isone_triacheck(A::AbstractMatrix, m::Int)
     @inbounds for i in 1:m, j in i:m
         if i == j
             isone(A[i,i]) || return false
@@ -35,7 +35,7 @@ end
 end
 
 # Inner loop over rows to be friendly to the CPU cache
-@inline function _isone_cachefriendly(A::StridedMatrix, m::Int)
+@inline function _isone_cachefriendly(A::AbstractMatrix, m::Int)
     @inbounds for i in 1:m, j in 1:m
         if i == j
             isone(A[i,i]) || return false
@@ -75,7 +75,8 @@ isposdef!(A::AbstractMatrix) =
 
 Test whether a matrix is positive definite (and Hermitian) by trying to perform a
 Cholesky factorization of `A`.
-See also [`isposdef!`](@ref)
+
+See also [`isposdef!`](@ref), [`cholesky`](@ref).
 
 # Examples
 ```jldoctest
@@ -205,6 +206,8 @@ diagind(m::Integer, n::Integer, k::Integer=0) =
 
 An `AbstractRange` giving the indices of the `k`th diagonal of the matrix `M`.
 
+See also: [`diag`](@ref), [`diagm`](@ref), [`Diagonal`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 2 3; 4 5 6; 7 8 9]
@@ -227,7 +230,7 @@ end
 
 The `k`th diagonal of a matrix, as a vector.
 
-See also: [`diagm`](@ref)
+See also [`diagm`](@ref), [`diagind`](@ref), [`Diagonal`](@ref), [`isdiag`](@ref).
 
 # Examples
 ```jldoctest
@@ -448,7 +451,7 @@ function (^)(A::AbstractMatrix{T}, p::Integer) where T<:Integer
 end
 function integerpow(A::AbstractMatrix{T}, p) where T
     TT = promote_op(^, T, typeof(p))
-    return (TT == T ? A : copyto!(similar(A, TT), A))^Integer(p)
+    return (TT == T ? A : convert(AbstractMatrix{TT}, A))^Integer(p)
 end
 function schurpow(A::AbstractMatrix, p)
     if istriu(A)
@@ -462,7 +465,7 @@ function schurpow(A::AbstractMatrix, p)
             retmat = retmat * powm!(UpperTriangular(float.(A)), real(p - floor(p)))
         end
     else
-        S,Q,d = schur(complex(A))
+        S,Q,d = Schur{Complex}(schur(A))
         # Integer part
         R = S ^ floor(p)
         # Real part
@@ -556,6 +559,28 @@ julia> exp(A)
 """
 exp(A::StridedMatrix{<:BlasFloat}) = exp!(copy(A))
 exp(A::StridedMatrix{<:Union{Integer,Complex{<:Integer}}}) = exp!(float.(A))
+exp(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(exp(parent(A)))
+exp(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(exp(parent(A)))
+
+"""
+    cis(A::AbstractMatrix)
+
+Compute ``\\exp(i A)`` for a square matrix ``A``.
+
+!!! compat "Julia 1.7"
+    Support for using `cis` with matrices was added in Julia 1.7.
+
+# Examples
+```jldoctest
+julia> cis([π 0; 0 π]) ≈ -I
+true
+```
+"""
+Base.cis(A::AbstractMatrix) = exp(im * A)  # fallback
+Base.cis(A::AbstractMatrix{<:Base.HWNumber}) = exp_maybe_inplace(float.(im .* A))
+
+exp_maybe_inplace(A::StridedMatrix{<:Union{ComplexF32, ComplexF64}}) = exp!(A)
+exp_maybe_inplace(A) = exp(A)
 
 """
     ^(b::Number, A::AbstractMatrix)
@@ -592,7 +617,6 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
     end
     ilo, ihi, scale = LAPACK.gebal!('B', A)    # modifies A
     nA   = opnorm(A, 1)
-    Inn    = Matrix{T}(I, n, n)
     ## For sufficiently small nA, use lower order Padé-Approximations
     if (nA <= 2.1)
         if nA > 0.95
@@ -609,23 +633,29 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
             C = T[120.,60.,12.,1.]
         end
         A2 = A * A
-        P  = copy(Inn)
-        U  = C[2] * P
-        V  = C[1] * P
-        for k in 1:(div(size(C, 1), 2) - 1)
-            k2 = 2 * k
+        # Compute U and V: Even/odd terms in Padé numerator & denom
+        # Expansion of k=1 in for loop
+        P = A2
+        U = mul!(C[4]*P, true, C[2]*I, true, true) #U = C[2]*I + C[4]*P
+        V = mul!(C[3]*P, true, C[1]*I, true, true) #V = C[1]*I + C[3]*P
+        for k in 2:(div(length(C), 2) - 1)
             P *= A2
-            U += C[k2 + 2] * P
-            V += C[k2 + 1] * P
+            mul!(U, C[2k + 2], P, true, true) # U += C[2k+2]*P
+            mul!(V, C[2k + 1], P, true, true) # V += C[2k+1]*P
         end
+
         U = A * U
-        X = V + U
-        LAPACK.gesv!(V-U, X)
+
+        # Padé approximant:  (V-U)\(V+U)
+        tmp1, tmp2 = A, A2 # Reuse already allocated arrays
+        tmp1 .= V .- U
+        tmp2 .= V .+ U
+        X = LAPACK.gesv!(tmp1, tmp2)[1]
     else
         s  = log2(nA/5.4)               # power of 2 later reversed by squaring
         if s > 0
             si = ceil(Int,s)
-            A /= convert(T,2^si)
+            A ./= convert(T,2^si)
         end
         CC = T[64764752532480000.,32382376266240000.,7771770303897600.,
                 1187353796428800.,  129060195264000.,  10559470521600.,
@@ -635,16 +665,35 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
         A2 = A * A
         A4 = A2 * A2
         A6 = A2 * A4
-        U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
-                  CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2 .+ CC[2].*Inn)
-        V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
-                   CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2 .+ CC[1].*Inn
+        tmp1, tmp2 = similar(A6), similar(A6)
+
+        # Allocation economical version of:
+        # U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
+        #           CC[8].*A6 .+ CC[6].*A4 .+ CC[4]*A2+CC[2]*I)
+        tmp1 .= CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2
+        tmp2 .= CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2
+        mul!(tmp2, true,CC[2]*I, true, true) # tmp2 .+= CC[2]*I
+        U = mul!(tmp2, A6, tmp1, true, true)
+        U, tmp1 = mul!(tmp1, A, U), A # U = A * U0
+
+        # Allocation economical version of:
+        # V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
+        #           CC[7].*A6 .+ CC[5].*A4 .+ CC[3]*A2 .+ CC[1]*I
+        tmp1 .= CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2
+        tmp2 .= CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2
+        mul!(tmp2, true, CC[1]*I, true, true) # tmp2 .+= CC[1]*I
+        V = mul!(tmp2, A6, tmp1, true, true)
+
+        tmp1 .= V .+ U
+        tmp2 .= V .- U # tmp2 aleady contained V but this seems more readable
+        X = LAPACK.gesv!(tmp2, tmp1)[1] # X now contains r_13 in Higham 2008
 
-        X = V + U
-        LAPACK.gesv!(V-U, X)
-
-        if s > 0            # squaring to reverse dividing by power of 2
-            for t=1:si; X *= X end
+        if s > 0
+            # Repeated squaring to compute X = r_13^(2^si)
+            for t=1:si
+                mul!(tmp2, X, X)
+                X, tmp2 = tmp2, X
+            end
         end
     end
 
@@ -727,9 +776,8 @@ function log(A::StridedMatrix)
             if is_log_real
                 logA = SchurF.Z * log_quasitriu(SchurF.T) * SchurF.Z'
             else
-                SchurS = schur!(complex(SchurF.T))
-                Z = SchurF.Z * SchurS.Z
-                logA = Z * log(UpperTriangular(SchurS.T)) * Z'
+                SchurS = Schur{Complex}(SchurF)
+                logA = SchurS.Z * log(UpperTriangular(SchurS.T)) * SchurS.Z'
             end
         end
         return eltype(A) <: Complex ? complex(logA) : logA
@@ -739,6 +787,9 @@ function log(A::StridedMatrix)
     end
 end
 
+log(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(log(parent(A)))
+log(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(log(parent(A)))
+
 """
     sqrt(A::AbstractMatrix)
 
@@ -802,9 +853,8 @@ function sqrt(A::StridedMatrix{T}) where {T<:Union{Real,Complex}}
             if typeof(sqrt(zero(T))) <: BlasFloat && is_sqrt_real
                 sqrtA = SchurF.Z * sqrt_quasitriu(SchurF.T) * SchurF.Z'
             else
-                SchurS = schur!(complex(SchurF.T))
-                Z = SchurF.Z * SchurS.Z
-                sqrtA = Z * sqrt(UpperTriangular(SchurS.T)) * Z'
+                SchurS = Schur{Complex}(SchurF)
+                sqrtA = SchurS.Z * sqrt(UpperTriangular(SchurS.T)) * SchurS.Z'
             end
         end
         return eltype(A) <: Complex ? complex(sqrtA) : sqrtA
@@ -814,6 +864,9 @@ function sqrt(A::StridedMatrix{T}) where {T<:Union{Real,Complex}}
     end
 end
 
+sqrt(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(sqrt(parent(A)))
+sqrt(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(sqrt(parent(A)))
+
 function inv(A::StridedMatrix{T}) where T
     checksquare(A)
     S = typeof((one(T)*zero(T) + one(T)*zero(T))/one(T))
@@ -1046,7 +1099,7 @@ function acos(A::AbstractMatrix)
         acosHermA = acos(Hermitian(A))
         return isa(acosHermA, Hermitian) ? copytri!(parent(acosHermA), 'U', true) : parent(acosHermA)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(-im * log(U + im * sqrt(I - U^2))))
     return SchurF.Z * R * SchurF.Z'
@@ -1077,7 +1130,7 @@ function asin(A::AbstractMatrix)
         asinHermA = asin(Hermitian(A))
         return isa(asinHermA, Hermitian) ? copytri!(parent(asinHermA), 'U', true) : parent(asinHermA)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(-im * log(im * U + sqrt(I - U^2))))
     return SchurF.Z * R * SchurF.Z'
@@ -1107,7 +1160,7 @@ function atan(A::AbstractMatrix)
     if ishermitian(A)
         return copytri!(parent(atan(Hermitian(A))), 'U', true)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = im * UpperTriangular(SchurF.T)
     R = triu!(parent(log((I + U) / (I - U)) / 2im))
     return SchurF.Z * R * SchurF.Z'
@@ -1126,7 +1179,7 @@ function acosh(A::AbstractMatrix)
         acoshHermA = acosh(Hermitian(A))
         return isa(acoshHermA, Hermitian) ? copytri!(parent(acoshHermA), 'U', true) : parent(acoshHermA)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(log(U + sqrt(U - I) * sqrt(U + I))))
     return SchurF.Z * R * SchurF.Z'
@@ -1144,7 +1197,7 @@ function asinh(A::AbstractMatrix)
     if ishermitian(A)
         return copytri!(parent(asinh(Hermitian(A))), 'U', true)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(log(U + sqrt(I + U^2))))
     return SchurF.Z * R * SchurF.Z'
@@ -1162,7 +1215,7 @@ function atanh(A::AbstractMatrix)
     if ishermitian(A)
         return copytri!(parent(atanh(Hermitian(A))), 'U', true)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(log((I + U) / (I - U)) / 2))
     return SchurF.Z * R * SchurF.Z'
@@ -1321,7 +1374,7 @@ function factorize(A::StridedMatrix{T}) where T
         end
         return lu(A)
     end
-    qr(A, Val(true))
+    qr(A, ColumnNorm())
 end
 factorize(A::Adjoint)   =   adjoint(factorize(parent(A)))
 factorize(A::Transpose) = transpose(factorize(parent(A)))
@@ -1378,31 +1431,23 @@ function pinv(A::AbstractMatrix{T}; atol::Real = 0.0, rtol::Real = (eps(real(flo
     m, n = size(A)
     Tout = typeof(zero(T)/sqrt(one(T) + one(T)))
     if m == 0 || n == 0
-        return Matrix{Tout}(undef, n, m)
+        return similar(A, Tout, (n, m))
     end
-    if istril(A)
-        if istriu(A)
-            maxabsA = maximum(abs.(diag(A)))
-            tol = max(rtol*maxabsA, atol)
-            B = zeros(Tout, n, m)
-            for i = 1:min(m, n)
-                if abs(A[i,i]) > tol
-                    Aii = inv(A[i,i])
-                    if isfinite(Aii)
-                        B[i,i] = Aii
-                    end
-                end
-            end
-            return B
-        end
+    if isdiag(A)
+        ind = diagind(A)
+        dA = view(A, ind)
+        maxabsA = maximum(abs, dA)
+        tol = max(rtol * maxabsA, atol)
+        B = fill!(similar(A, Tout, (n, m)), 0)
+        B[ind] .= (x -> abs(x) > tol ? pinv(x) : zero(x)).(dA)
+        return B
     end
-    SVD         = svd(A, full = false)
+    SVD         = svd(A)
     tol         = max(rtol*maximum(SVD.S), atol)
     Stype       = eltype(SVD.S)
-    Sinv        = zeros(Stype, length(SVD.S))
+    Sinv        = fill!(similar(A, Stype, length(SVD.S)), 0)
     index       = SVD.S .> tol
-    Sinv[index] = one(Stype) ./ SVD.S[index]
-    Sinv[findall(.!isfinite.(Sinv))] .= zero(Stype)
+    Sinv[index] .= pinv.(view(SVD.S, index))
     return SVD.Vt' * (Diagonal(Sinv) * SVD.U')
 end
 function pinv(x::Number)
@@ -1451,17 +1496,15 @@ julia> nullspace(M, atol=0.95)
  1.0
 ```
 """
-function nullspace(A::AbstractMatrix; atol::Real = 0.0, rtol::Real = (min(size(A)...)*eps(real(float(one(eltype(A))))))*iszero(atol))
-    m, n = size(A)
-    (m == 0 || n == 0) && return Matrix{eltype(A)}(I, n, n)
-    SVD = svd(A, full=true)
+function nullspace(A::AbstractVecOrMat; atol::Real = 0.0, rtol::Real = (min(size(A, 1), size(A, 2))*eps(real(float(one(eltype(A))))))*iszero(atol))
+    m, n = size(A, 1), size(A, 2)
+    (m == 0 || n == 0) && return Matrix{eigtype(eltype(A))}(I, n, n)
+    SVD = svd(A; full=true)
     tol = max(atol, SVD.S[1]*rtol)
     indstart = sum(s -> s .> tol, SVD.S) + 1
     return copy(SVD.Vt[indstart:end,:]')
 end
 
-nullspace(A::AbstractVector; atol::Real = 0.0, rtol::Real = (min(size(A)...)*eps(real(float(one(eltype(A))))))*iszero(atol)) = nullspace(reshape(A, length(A), 1), rtol= rtol, atol= atol)
-
 """
     cond(M, p::Real=2)
 
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index 10e8e64e01ae77..b31360e233a518 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -14,46 +14,55 @@ Diagonal(v::AbstractVector{T}) where {T} = Diagonal{T,typeof(v)}(v)
 Diagonal{T}(v::AbstractVector) where {T} = Diagonal(convert(AbstractVector{T}, v)::AbstractVector{T})
 
 """
-    Diagonal(A::AbstractMatrix)
+    Diagonal(V::AbstractVector)
 
-Construct a matrix from the diagonal of `A`.
+Construct a matrix with `V` as its diagonal.
+
+See also [`diag`](@ref), [`diagm`](@ref).
 
 # Examples
 ```jldoctest
-julia> A = [1 2 3; 4 5 6; 7 8 9]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
- 7  8  9
-
-julia> Diagonal(A)
-3×3 Diagonal{Int64, Vector{Int64}}:
- 1  ⋅  ⋅
- ⋅  5  ⋅
- ⋅  ⋅  9
+julia> Diagonal([1, 10, 100])
+3×3 Diagonal{$Int, Vector{$Int}}:
+ 1   ⋅    ⋅
+ ⋅  10    ⋅
+ ⋅   ⋅  100
+
+julia> diagm([7, 13])
+2×2 Matrix{$Int}:
+ 7   0
+ 0  13
 ```
 """
-Diagonal(A::AbstractMatrix) = Diagonal(diag(A))
+Diagonal(V::AbstractVector)
 
 """
-    Diagonal(V::AbstractVector)
+    Diagonal(A::AbstractMatrix)
 
-Construct a matrix with `V` as its diagonal.
+Construct a matrix from the diagonal of `A`.
 
 # Examples
 ```jldoctest
-julia> V = [1, 2]
-2-element Vector{Int64}:
- 1
- 2
-
-julia> Diagonal(V)
-2×2 Diagonal{Int64, Vector{Int64}}:
- 1  ⋅
- ⋅  2
+julia> A = permutedims(reshape(1:15, 5, 3))
+3×5 Matrix{Int64}:
+  1   2   3   4   5
+  6   7   8   9  10
+ 11  12  13  14  15
+
+julia> Diagonal(A)
+3×3 Diagonal{$Int, Vector{$Int}}:
+ 1  ⋅   ⋅
+ ⋅  7   ⋅
+ ⋅  ⋅  13
+
+julia> diag(A, 2)
+3-element Vector{$Int}:
+  3
+  9
+ 15
 ```
 """
-Diagonal(V::AbstractVector)
+Diagonal(A::AbstractMatrix) = Diagonal(diag(A))
 
 Diagonal(D::Diagonal) = D
 Diagonal{T}(D::Diagonal{T}) where {T} = D
@@ -63,6 +72,13 @@ AbstractMatrix{T}(D::Diagonal) where {T} = Diagonal{T}(D)
 Matrix(D::Diagonal) = diagm(0 => D.diag)
 Array(D::Diagonal) = Matrix(D)
 
+"""
+    Diagonal{T}(undef, n)
+
+Construct an uninitialized `Diagonal{T}` of length `n`. See `undef`.
+"""
+Diagonal{T}(::UndefInitializer, n::Integer) where T = Diagonal(Vector{T}(undef, n))
+
 # For D<:Diagonal, similar(D[, neweltype]) should yield a Diagonal matrix.
 # On the other hand, similar(D, [neweltype,] shape...) should yield a sparse matrix.
 # The first method below effects the former, and the second the latter.
@@ -175,6 +191,12 @@ end
 (*)(D::Diagonal, x::Number) = Diagonal(D.diag * x)
 (/)(D::Diagonal, x::Number) = Diagonal(D.diag / x)
 (\)(x::Number, D::Diagonal) = Diagonal(x \ D.diag)
+(^)(D::Diagonal, a::Number) = Diagonal(D.diag .^ a)
+(^)(D::Diagonal, a::Real) = Diagonal(D.diag .^ a) # for disambiguation
+(^)(D::Diagonal, a::Integer) = Diagonal(D.diag .^ a) # for disambiguation
+Base.literal_pow(::typeof(^), D::Diagonal, valp::Val) =
+    Diagonal(Base.literal_pow.(^, D.diag, valp)) # for speed
+Base.literal_pow(::typeof(^), D::Diagonal, ::Val{-1}) = inv(D) # for disambiguation
 
 function (*)(Da::Diagonal, Db::Diagonal)
     nDa, mDb = size(Da, 2), size(Db, 1)
@@ -193,23 +215,31 @@ function (*)(D::Diagonal, V::AbstractVector)
 end
 
 (*)(A::AbstractTriangular, D::Diagonal) =
-    rmul!(copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A), D)
+    rmul!(copy_oftype(A, promote_op(*, eltype(A), eltype(D.diag))), D)
 (*)(D::Diagonal, B::AbstractTriangular) =
-    lmul!(D, copyto!(similar(B, promote_op(*, eltype(B), eltype(D.diag))), B))
+    lmul!(D, copy_oftype(B, promote_op(*, eltype(B), eltype(D.diag))))
 
 (*)(A::AbstractMatrix, D::Diagonal) =
-    rmul!(copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A), D)
+    rmul!(copy_similar(A, promote_op(*, eltype(A), eltype(D.diag))), D)
 (*)(D::Diagonal, A::AbstractMatrix) =
-    lmul!(D, copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A))
+    lmul!(D, copy_similar(A, promote_op(*, eltype(A), eltype(D.diag))))
 
 function rmul!(A::AbstractMatrix, D::Diagonal)
     require_one_based_indexing(A)
+    nA, nD = size(A, 2), length(D.diag)
+    if nA != nD
+        throw(DimensionMismatch("second dimension of A, $nA, does not match the first of D, $nD"))
+    end
     A .= A .* permutedims(D.diag)
     return A
 end
 
 function lmul!(D::Diagonal, B::AbstractVecOrMat)
     require_one_based_indexing(B)
+    nB, nD = size(B, 1), length(D.diag)
+    if nB != nD
+        throw(DimensionMismatch("second dimension of D, $nD, does not match the first of B, $nB"))
+    end
     B .= D.diag .* B
     return B
 end
@@ -245,9 +275,6 @@ function lmul!(D::Diagonal, B::UnitUpperTriangular)
     UpperTriangular(B.data)
 end
 
-*(D::Adjoint{<:Any,<:Diagonal}, B::Diagonal) = Diagonal(adjoint.(D.parent.diag) .* B.diag)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, D::Diagonal) =
-    rmul!(copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A), D)
 function *(adjA::Adjoint{<:Any,<:AbstractMatrix}, D::Diagonal)
     A = adjA.parent
     Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -255,9 +282,6 @@ function *(adjA::Adjoint{<:Any,<:AbstractMatrix}, D::Diagonal)
     rmul!(Ac, D)
 end
 
-*(D::Transpose{<:Any,<:Diagonal}, B::Diagonal) = Diagonal(transpose.(D.parent.diag) .* B.diag)
-*(A::Transpose{<:Any,<:AbstractTriangular}, D::Diagonal) =
-    rmul!(copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A), D)
 function *(transA::Transpose{<:Any,<:AbstractMatrix}, D::Diagonal)
     A = transA.parent
     At = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -265,10 +289,9 @@ function *(transA::Transpose{<:Any,<:AbstractMatrix}, D::Diagonal)
     rmul!(At, D)
 end
 
-*(D::Diagonal, B::Adjoint{<:Any,<:Diagonal}) = Diagonal(D.diag .* adjoint.(B.parent.diag))
-*(D::Diagonal, B::Adjoint{<:Any,<:AbstractTriangular}) =
-    lmul!(D, copyto!(similar(B, promote_op(*, eltype(B), eltype(D.diag))), B))
-*(D::Diagonal, adjQ::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) = (Q = adjQ.parent; rmul!(Array(D), adjoint(Q)))
+*(D::Diagonal, adjQ::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
+    rmul!(Array{promote_type(eltype(D), eltype(adjQ))}(D), adjQ)
+
 function *(D::Diagonal, adjA::Adjoint{<:Any,<:AbstractMatrix})
     A = adjA.parent
     Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -276,9 +299,6 @@ function *(D::Diagonal, adjA::Adjoint{<:Any,<:AbstractMatrix})
     lmul!(D, Ac)
 end
 
-*(D::Diagonal, B::Transpose{<:Any,<:Diagonal}) = Diagonal(D.diag .* transpose.(B.parent.diag))
-*(D::Diagonal, B::Transpose{<:Any,<:AbstractTriangular}) =
-    lmul!(D, copyto!(similar(B, promote_op(*, eltype(B), eltype(D.diag))), B))
 function *(D::Diagonal, transA::Transpose{<:Any,<:AbstractMatrix})
     A = transA.parent
     At = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -286,182 +306,62 @@ function *(D::Diagonal, transA::Transpose{<:Any,<:AbstractMatrix})
     lmul!(D, At)
 end
 
-*(D::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:Diagonal}) =
-    Diagonal(adjoint.(D.parent.diag) .* adjoint.(B.parent.diag))
-*(D::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:Diagonal}) =
-    Diagonal(transpose.(D.parent.diag) .* transpose.(B.parent.diag))
-
 rmul!(A::Diagonal, B::Diagonal) = Diagonal(A.diag .*= B.diag)
 lmul!(A::Diagonal, B::Diagonal) = Diagonal(B.diag .= A.diag .* B.diag)
 
-function lmul!(adjA::Adjoint{<:Any,<:Diagonal}, B::AbstractMatrix)
-    A = adjA.parent
-    return lmul!(adjoint(A), B)
-end
-function lmul!(transA::Transpose{<:Any,<:Diagonal}, B::AbstractMatrix)
-    A = transA.parent
-    return lmul!(transpose(A), B)
-end
-
-function rmul!(A::AbstractMatrix, adjB::Adjoint{<:Any,<:Diagonal})
-    B = adjB.parent
-    return rmul!(A, adjoint(B))
-end
-function rmul!(A::AbstractMatrix, transB::Transpose{<:Any,<:Diagonal})
-    B = transB.parent
-    return rmul!(A, transpose(B))
-end
-
 # Get ambiguous method if try to unify AbstractVector/AbstractMatrix here using AbstractVecOrMat
-@inline mul!(out::AbstractVector, A::Diagonal, in::AbstractVector,
-             alpha::Number, beta::Number) =
+@inline mul!(out::AbstractVector, A::Diagonal, in::AbstractVector, alpha::Number, beta::Number) =
     out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractVector, A::Adjoint{<:Any,<:Diagonal}, in::AbstractVector,
-             alpha::Number, beta::Number) =
-    out .= (adjoint.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractVector, A::Transpose{<:Any,<:Diagonal}, in::AbstractVector,
-             alpha::Number, beta::Number) =
-    out .= (transpose.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::StridedMatrix,
-             alpha::Number, beta::Number) =
+@inline mul!(out::AbstractMatrix, A::Diagonal, in::AbstractMatrix, alpha::Number, beta::Number) =
     out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, in::StridedMatrix,
-             alpha::Number, beta::Number) =
-    out .= (adjoint.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, in::StridedMatrix,
-             alpha::Number, beta::Number) =
-    out .= (transpose.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::Adjoint{<:Any,<:StridedMatrix},
+@inline mul!(out::AbstractMatrix, A::Diagonal, in::Adjoint{<:Any,<:AbstractVecOrMat},
              alpha::Number, beta::Number) =
     out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, in::Adjoint{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (adjoint.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, in::Adjoint{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (transpose.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::Transpose{<:Any,<:StridedMatrix},
+@inline mul!(out::AbstractMatrix, A::Diagonal, in::Transpose{<:Any,<:AbstractVecOrMat},
              alpha::Number, beta::Number) =
     out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, in::Transpose{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (adjoint.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, in::Transpose{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (transpose.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
 
-@inline mul!(out::AbstractMatrix, in::StridedMatrix, A::Diagonal,
-             alpha::Number, beta::Number) =
+@inline mul!(out::AbstractMatrix, in::AbstractMatrix, A::Diagonal, alpha::Number, beta::Number) =
     out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::StridedMatrix, A::Adjoint{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* adjoint(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::StridedMatrix, A::Transpose{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* transpose(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, in::Adjoint{<:Any,<:StridedMatrix}, A::Diagonal,
+@inline mul!(out::AbstractMatrix, in::Adjoint{<:Any,<:AbstractVecOrMat}, A::Diagonal,
              alpha::Number, beta::Number) =
     out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Adjoint{<:Any,<:StridedMatrix}, A::Adjoint{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* adjoint(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Adjoint{<:Any,<:StridedMatrix}, A::Transpose{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* transpose(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, in::Transpose{<:Any,<:StridedMatrix}, A::Diagonal,
+@inline mul!(out::AbstractMatrix, in::Transpose{<:Any,<:AbstractVecOrMat}, A::Diagonal,
              alpha::Number, beta::Number) =
     out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Transpose{<:Any,<:StridedMatrix}, A::Adjoint{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* adjoint(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Transpose{<:Any,<:StridedMatrix}, A::Transpose{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* transpose(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-
-# ambiguities with Symmetric/Hermitian
-# RealHermSymComplex[Sym]/[Herm] only include Number; invariant to [c]transpose
-*(A::Diagonal, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = A * transB.parent
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, B::Diagonal) = transA.parent * B
-*(A::Diagonal, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Diagonal) = adjA.parent * B
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, transD::Transpose{<:Any,<:Diagonal}) = transA.parent * transD
-*(transD::Transpose{<:Any,<:Diagonal}, transA::Transpose{<:Any,<:RealHermSymComplexSym}) = transD * transA.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, adjD::Adjoint{<:Any,<:Diagonal}) = adjA.parent * adjD
-*(adjD::Adjoint{<:Any,<:Diagonal}, adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}) = adjD * adjA.parent
-mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:RealHermSymComplexSym}) = C .= adjoint.(A.parent.diag) .* B
-mul!(C::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:RealHermSymComplexHerm}) = C .= transpose.(A.parent.diag) .* B
-
-@inline mul!(C::AbstractMatrix,
-             A::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:RealHermSym},
-             alpha::Number, beta::Number) = mul!(C, A, B.parent, alpha, beta)
-@inline mul!(C::AbstractMatrix,
-             A::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:RealHermSymComplexHerm},
-             alpha::Number, beta::Number) = mul!(C, A, B.parent, alpha, beta)
-@inline mul!(C::AbstractMatrix,
-             A::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:RealHermSym},
-             alpha::Number, beta::Number) = mul!(C, A, B.parent, alpha, beta)
-@inline mul!(C::AbstractMatrix,
-             A::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:RealHermSymComplexSym},
-             alpha::Number, beta::Number) = mul!(C, A, B.parent, alpha, beta)
-
-@inline mul!(C::AbstractMatrix,
-             A::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:RealHermSymComplexSym},
-             alpha::Number, beta::Number) =
-    C .= (adjoint.(A.parent.diag) .* B) .*ₛ alpha .+ C .*ₛ beta
-@inline mul!(C::AbstractMatrix,
-             A::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:RealHermSymComplexHerm},
-             alpha::Number, beta::Number) =
-    C .= (transpose.(A.parent.diag) .* B) .*ₛ alpha .+ C .*ₛ beta
 
-(/)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag ./ Db.diag)
-
-function ldiv!(D::Diagonal{T}, v::AbstractVector{T}) where {T}
-    if length(v) != length(D.diag)
-        throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $(length(v)) rows"))
-    end
-    for i = 1:length(D.diag)
-        d = D.diag[i]
-        if iszero(d)
-            throw(SingularException(i))
-        end
-        v[i] = d\v[i]
-    end
-    v
-end
-function ldiv!(D::Diagonal{T}, V::AbstractMatrix{T}) where {T}
-    require_one_based_indexing(V)
-    if size(V,1) != length(D.diag)
-        throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $(size(V,1)) rows"))
-    end
-    for i = 1:length(D.diag)
-        d = D.diag[i]
-        if iszero(d)
-            throw(SingularException(i))
+function mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number)
+    mA = size(Da, 1)
+    mB = size(Db, 1)
+    mA == mB || throw(DimensionMismatch("A has dimensions ($mA,$mA) but B has dimensions ($mB,$mB)"))
+    mC, nC = size(C)
+    mC == nC == mA || throw(DimensionMismatch("output matrix has size: ($mC,$nC), but should have size ($mA,$mA)"))
+    require_one_based_indexing(C)
+    da = Da.diag
+    db = Db.diag
+    _rmul_or_fill!(C, beta)
+    if iszero(beta)
+        @inbounds @simd for i in 1:mA
+            C[i,i] = Ref(da[i] * db[i]) .*ₛ alpha
         end
-        for j = 1:size(V,2)
-            @inbounds V[i,j] = d\V[i,j]
+    else
+        @inbounds @simd for i in 1:mA
+            C[i,i] += Ref(da[i] * db[i]) .*ₛ alpha
         end
     end
-    V
+    return C
 end
-ldiv!(x::AbstractArray, A::Diagonal, b::AbstractArray) = (x .= A.diag .\ b)
 
-ldiv!(adjD::Adjoint{<:Any,<:Diagonal{T}}, B::AbstractVecOrMat{T}) where {T} =
-    (D = adjD.parent; ldiv!(conj(D), B))
-ldiv!(transD::Transpose{<:Any,<:Diagonal{T}}, B::AbstractVecOrMat{T}) where {T} =
-    (D = transD.parent; ldiv!(D, B))
+(/)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag ./ Db.diag)
+
+ldiv!(x::AbstractArray, A::Diagonal, b::AbstractArray) = (x .= A.diag .\ b)
 
 function ldiv!(D::Diagonal, A::Union{LowerTriangular,UpperTriangular})
     broadcast!(\, parent(A), D.diag, parent(A))
     A
 end
 
-function rdiv!(A::AbstractMatrix{T}, D::Diagonal{T}) where {T}
+function rdiv!(A::AbstractMatrix, D::Diagonal)
     require_one_based_indexing(A)
     dd = D.diag
     m, n = size(A)
@@ -485,30 +385,16 @@ function rdiv!(A::Union{LowerTriangular,UpperTriangular}, D::Diagonal)
     A
 end
 
-rdiv!(A::AbstractMatrix{T}, adjD::Adjoint{<:Any,<:Diagonal{T}}) where {T} =
-    (D = adjD.parent; rdiv!(A, conj(D)))
-rdiv!(A::AbstractMatrix{T}, transD::Transpose{<:Any,<:Diagonal{T}}) where {T} =
-    (D = transD.parent; rdiv!(A, D))
-
 (/)(A::Union{StridedMatrix, AbstractTriangular}, D::Diagonal) =
     rdiv!((typeof(oneunit(eltype(D))/oneunit(eltype(A)))).(A), D)
 
-(\)(F::Factorization, D::Diagonal) =
-    ldiv!(F, Matrix{typeof(oneunit(eltype(D))/oneunit(eltype(F)))}(D))
-\(adjF::Adjoint{<:Any,<:Factorization}, D::Diagonal) =
-    (F = adjF.parent; ldiv!(adjoint(F), Matrix{typeof(oneunit(eltype(D))/oneunit(eltype(F)))}(D)))
-(\)(A::Union{QR,QRCompactWY,QRPivoted}, B::Diagonal) =
-    invoke(\, Tuple{Union{QR,QRCompactWY,QRPivoted}, AbstractVecOrMat}, A, B)
-
-
-@inline function kron!(C::AbstractMatrix{T}, A::Diagonal, B::Diagonal) where T
-    fill!(C, zero(T))
+@inline function kron!(C::AbstractMatrix, A::Diagonal, B::Diagonal)
     valA = A.diag; nA = length(valA)
     valB = B.diag; nB = length(valB)
     nC = checksquare(C)
     @boundscheck nC == nA*nB ||
         throw(DimensionMismatch("expect C to be a $(nA*nB)x$(nA*nB) matrix, got size $(nC)x$(nC)"))
-
+    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     @inbounds for i = 1:nA, j = 1:nB
         idx = (i-1)*nB+j
         C[idx, idx] = valA[i] * valB[j]
@@ -516,19 +402,16 @@ rdiv!(A::AbstractMatrix{T}, transD::Transpose{<:Any,<:Diagonal{T}}) where {T} =
     return C
 end
 
-function kron(A::Diagonal{T1}, B::Diagonal{T2}) where {T1<:Number, T2<:Number}
-    valA = A.diag; nA = length(valA)
-    valB = B.diag; nB = length(valB)
-    valC = Vector{typeof(zero(T1)*zero(T2))}(undef,nA*nB)
-    C = Diagonal(valC)
-    return @inbounds kron!(C, A, B)
-end
+kron(A::Diagonal{<:Number}, B::Diagonal{<:Number}) = Diagonal(kron(A.diag, B.diag))
 
 @inline function kron!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix)
     Base.require_one_based_indexing(B)
-    (mA, nA) = size(A); (mB, nB) = size(B); (mC, nC) = size(C);
+    (mA, nA) = size(A)
+    (mB, nB) = size(B)
+    (mC, nC) = size(C)
     @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
         throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
+    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     m = 1
     @inbounds for j = 1:nA
         A_jj = A[j,j]
@@ -546,9 +429,12 @@ end
 
 @inline function kron!(C::AbstractMatrix, A::AbstractMatrix, B::Diagonal)
     require_one_based_indexing(A)
-    (mA, nA) = size(A); (mB, nB) = size(B); (mC, nC) = size(C);
+    (mA, nA) = size(A)
+    (mB, nB) = size(B)
+    (mC, nC) = size(C)
     @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
         throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
+    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     m = 1
     @inbounds for j = 1:nA
         for l = 1:mB
@@ -564,23 +450,13 @@ end
     return C
 end
 
-function kron(A::Diagonal{T}, B::AbstractMatrix{S}) where {T<:Number, S<:Number}
-    (mA, nA) = size(A); (mB, nB) = size(B)
-    R = zeros(Base.promote_op(*, T, S), mA * mB, nA * nB)
-    return @inbounds kron!(R, A, B)
-end
-
-function kron(A::AbstractMatrix{T}, B::Diagonal{S}) where {T<:Number, S<:Number}
-    (mA, nA) = size(A); (mB, nB) = size(B)
-    R = zeros(promote_op(*, T, S), mA * mB, nA * nB)
-    return @inbounds kron!(R, A, B)
-end
-
 conj(D::Diagonal) = Diagonal(conj(D.diag))
 transpose(D::Diagonal{<:Number}) = D
 transpose(D::Diagonal) = Diagonal(transpose.(D.diag))
 adjoint(D::Diagonal{<:Number}) = conj(D)
 adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
+Base.permutedims(D::Diagonal) = D
+Base.permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
 
 function diag(D::Diagonal, k::Integer=0)
     # every branch call similar(..., ::Int) to make sure the
@@ -602,7 +478,7 @@ function logdet(D::Diagonal{<:Complex}) # make sure branch cut is correct
 end
 
 # Matrix functions
-for f in (:exp, :log, :sqrt,
+for f in (:exp, :cis, :log, :sqrt,
           :cos, :sin, :tan, :csc, :sec, :cot,
           :cosh, :sinh, :tanh, :csch, :sech, :coth,
           :acos, :asin, :atan, :acsc, :asec, :acot,
@@ -610,8 +486,13 @@ for f in (:exp, :log, :sqrt,
     @eval $f(D::Diagonal) = Diagonal($f.(D.diag))
 end
 
-#Linear solver
-function ldiv!(D::Diagonal, B::StridedVecOrMat)
+(\)(D::Diagonal, A::AbstractMatrix) =
+    ldiv!(D, (typeof(oneunit(eltype(D))/oneunit(eltype(A)))).(A))
+
+(\)(D::Diagonal, b::AbstractVector) = D.diag .\ b
+(\)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag .\ Db.diag)
+
+function ldiv!(D::Diagonal, B::AbstractVecOrMat)
     m, n = size(B, 1), size(B, 2)
     if m != length(D.diag)
         throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $m rows"))
@@ -628,11 +509,6 @@ function ldiv!(D::Diagonal, B::StridedVecOrMat)
     end
     return B
 end
-(\)(D::Diagonal, A::AbstractMatrix) =
-    ldiv!(D, (typeof(oneunit(eltype(D))/oneunit(eltype(A)))).(A))
-
-(\)(D::Diagonal, b::AbstractVector) = D.diag .\ b
-(\)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag .\ Db.diag)
 
 function inv(D::Diagonal{T}) where T
     Di = similar(D.diag, typeof(inv(zero(T))))
diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl
index 3a423a87295f35..e9b9f2b2493a3a 100644
--- a/stdlib/LinearAlgebra/src/eigen.jl
+++ b/stdlib/LinearAlgebra/src/eigen.jl
@@ -236,6 +236,14 @@ function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortb
     isdiag(AA) && return eigen(Diagonal(AA); permute=permute, scale=scale, sortby=sortby)
     return eigen!(AA; permute=permute, scale=scale, sortby=sortby)
 end
+function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where {T <: Union{Float16,Complex{Float16}}}
+    AA = copy_oftype(A, eigtype(T))
+    isdiag(AA) && return eigen(Diagonal(AA); permute=permute, scale=scale, sortby=sortby)
+    A = eigen!(AA; permute, scale, sortby)
+    values = convert(AbstractVector{isreal(A.values) ? Float16 : Complex{Float16}}, A.values)
+    vectors = convert(AbstractMatrix{isreal(A.vectors) ? Float16 : Complex{Float16}}, A.vectors)
+    return Eigen(values, vectors)
+end
 eigen(x::Number) = Eigen([x], fill(one(x), 1, 1))
 
 """
@@ -614,6 +622,16 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{Eigen,Generaliz
     show(io, mime, F.vectors)
 end
 
+function Base.hash(F::Eigen, h::UInt)
+    return hash(F.values, hash(F.vectors, hash(Eigen, h)))
+end
+function Base.:(==)(A::Eigen, B::Eigen)
+    return A.values == B.values && A.vectors == B.vectors
+end
+function Base.isequal(A::Eigen, B::Eigen)
+    return isequal(A.values, B.values) && isequal(A.vectors, B.vectors)
+end
+
 # Conversion methods
 
 ## Can we determine the source/result is Real?  This is not stored in the type Eigen
diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl
index 3e335ed391ad69..626a1ae7b1a748 100644
--- a/stdlib/LinearAlgebra/src/factorization.jl
+++ b/stdlib/LinearAlgebra/src/factorization.jl
@@ -16,9 +16,9 @@ size(F::Adjoint{<:Any,<:Factorization}) = reverse(size(parent(F)))
 size(F::Transpose{<:Any,<:Factorization}) = reverse(size(parent(F)))
 
 checkpositivedefinite(info) = info == 0 || throw(PosDefException(info))
-checknonsingular(info, pivoted::Val{true}) = info == 0 || throw(SingularException(info))
-checknonsingular(info, pivoted::Val{false}) = info == 0 || throw(ZeroPivotException(info))
-checknonsingular(info) = checknonsingular(info, Val{true}())
+checknonsingular(info, ::RowMaximum) = info == 0 || throw(SingularException(info))
+checknonsingular(info, ::NoPivot) = info == 0 || throw(ZeroPivotException(info))
+checknonsingular(info) = checknonsingular(info, RowMaximum())
 
 """
     issuccess(F::Factorization)
@@ -59,6 +59,9 @@ convert(::Type{T}, f::Factorization) where {T<:AbstractArray} = T(f)
 
 ### General promotion rules
 Factorization{T}(F::Factorization{T}) where {T} = F
+# This is a bit odd since the return is not a Factorization but it works well in generic code
+Factorization{T}(A::Adjoint{<:Any,<:Factorization}) where {T} =
+    adjoint(Factorization{T}(parent(A)))
 inv(F::Factorization{T}) where {T} = (n = size(F, 1); ldiv!(F, Matrix{T}(I, n, n)))
 
 Base.hash(F::Factorization, h::UInt) = mapreduce(f -> hash(getfield(F, f)), hash, 1:nfields(F); init=h)
@@ -96,40 +99,21 @@ function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where T<:BlasReal
     return copy(reinterpret(Complex{T}, x))
 end
 
-function \(F::Factorization, B::AbstractVecOrMat)
+function \(F::Union{Factorization, Adjoint{<:Any,<:Factorization}}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    BB = similar(B, TFB, size(B))
-    copyto!(BB, B)
-    ldiv!(F, BB)
-end
-function \(adjF::Adjoint{<:Any,<:Factorization}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    F = adjF.parent
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    BB = similar(B, TFB, size(B))
-    copyto!(BB, B)
-    ldiv!(adjoint(F), BB)
+    ldiv!(F, copy_similar(B, TFB))
 end
 
-function /(B::AbstractMatrix, F::Factorization)
+function /(B::AbstractMatrix, F::Union{Factorization, Adjoint{<:Any,<:Factorization}})
     require_one_based_indexing(B)
     TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    BB = similar(B, TFB, size(B))
-    copyto!(BB, B)
-    rdiv!(BB, F)
-end
-function /(B::AbstractMatrix, adjF::Adjoint{<:Any,<:Factorization})
-    require_one_based_indexing(B)
-    F = adjF.parent
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    BB = similar(B, TFB, size(B))
-    copyto!(BB, B)
-    rdiv!(BB, adjoint(F))
+    rdiv!(copy_similar(B, TFB), F)
 end
 /(adjB::AdjointAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjB.parent)
 /(B::TransposeAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjoint(B))
 
+
 # support the same 3-arg idiom as in our other in-place A_*_B functions:
 function ldiv!(Y::AbstractVecOrMat, A::Factorization, B::AbstractVecOrMat)
     require_one_based_indexing(Y, B)
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
index 8e0ca4fb72ad53..cf7e4744687852 100644
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ b/stdlib/LinearAlgebra/src/generic.jl
@@ -702,9 +702,10 @@ end
 function opnorm2(A::AbstractMatrix{T}) where T
     require_one_based_indexing(A)
     m,n = size(A)
-    if m == 1 || n == 1 return norm2(A) end
     Tnorm = typeof(float(real(zero(T))))
-    (m == 0 || n == 0) ? zero(Tnorm) : convert(Tnorm, svdvals(A)[1])
+    if m == 0 || n == 0 return zero(Tnorm) end
+    if m == 1 || n == 1 return norm2(A) end
+    return svdvals(A)[1]
 end
 
 function opnormInf(A::AbstractMatrix{T}) where T
@@ -1109,6 +1110,8 @@ When `A` is sparse, a similar polyalgorithm is used. For indefinite matrices, th
 factorization does not use pivoting during the numerical factorization and therefore the
 procedure can fail even for invertible matrices.
 
+See also: [`factorize`](@ref), [`pinv`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 0; 1 -2]; B = [32; -4];
@@ -1138,7 +1141,7 @@ function (\)(A::AbstractMatrix, B::AbstractVecOrMat)
         end
         return lu(A) \ B
     end
-    return qr(A,Val(true)) \ B
+    return qr(A, ColumnNorm()) \ B
 end
 
 (\)(a::AbstractVector, b::AbstractArray) = pinv(a) * b
@@ -1289,15 +1292,17 @@ false
 """
 function istriu(A::AbstractMatrix, k::Integer = 0)
     require_one_based_indexing(A)
+    return _istriu(A, k)
+end
+istriu(x::Number) = true
+
+@inline function _istriu(A::AbstractMatrix, k)
     m, n = size(A)
     for j in 1:min(n, m + k - 1)
-        for i in max(1, j - k + 1):m
-            iszero(A[i, j]) || return false
-        end
+        all(iszero, view(A, max(1, j - k + 1):m, j)) || return false
     end
     return true
 end
-istriu(x::Number) = true
 
 """
     istril(A::AbstractMatrix, k::Integer = 0) -> Bool
@@ -1331,15 +1336,17 @@ false
 """
 function istril(A::AbstractMatrix, k::Integer = 0)
     require_one_based_indexing(A)
+    return _istril(A, k)
+end
+istril(x::Number) = true
+
+@inline function _istril(A::AbstractMatrix, k)
     m, n = size(A)
     for j in max(1, k + 2):n
-        for i in 1:min(j - k - 1, m)
-            iszero(A[i, j]) || return false
-        end
+        all(iszero, view(A, 1:min(j - k - 1, m), j)) || return false
     end
     return true
 end
-istril(x::Number) = true
 
 """
     isbanded(A::AbstractMatrix, kl::Integer, ku::Integer) -> Bool
@@ -1487,21 +1494,17 @@ end
 
 # Elementary reflection similar to LAPACK. The reflector is not Hermitian but
 # ensures that tridiagonalization of Hermitian matrices become real. See lawn72
-@inline function reflector!(x::AbstractVector)
+@inline function reflector!(x::AbstractVector{T}) where {T}
     require_one_based_indexing(x)
     n = length(x)
     n == 0 && return zero(eltype(x))
     @inbounds begin
         ξ1 = x[1]
-        normu = abs2(ξ1)
-        for i = 2:n
-            normu += abs2(x[i])
-        end
+        normu = norm(x)
         if iszero(normu)
             return zero(ξ1/normu)
         end
-        normu = sqrt(normu)
-        ν = copysign(normu, real(ξ1))
+        ν = T(copysign(normu, real(ξ1)))
         ξ1 += ν
         x[1] = -ν
         for i = 2:n
@@ -1512,29 +1515,18 @@ end
 end
 
 # apply reflector from left
-@inline function reflectorApply!(x::AbstractVector, τ::Number, A::StridedMatrix)
+@inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractMatrix)
     require_one_based_indexing(x)
     m, n = size(A)
     if length(x) != m
         throw(DimensionMismatch("reflector has length $(length(x)), which must match the first dimension of matrix A, $m"))
     end
     m == 0 && return A
-    @inbounds begin
-        for j = 1:n
-            # dot
-            vAj = A[1, j]
-            for i = 2:m
-                vAj += x[i]'*A[i, j]
-            end
-
-            vAj = conj(τ)*vAj
-
-            # ger
-            A[1, j] -= vAj
-            for i = 2:m
-                A[i, j] -= x[i]*vAj
-            end
-        end
+    @inbounds for j = 1:n
+        Aj, xj = view(A, 2:m, j), view(x, 2:m)
+        vAj = conj(τ)*(A[1, j] + dot(xj, Aj))
+        A[1, j] -= vAj
+        axpy!(-vAj, xj, Aj)
     end
     return A
 end
@@ -1544,6 +1536,8 @@ end
 
 Matrix determinant.
 
+See also: [`logdet`](@ref) and [`logabsdet`](@ref).
+
 # Examples
 ```jldoctest
 julia> M = [1 0; 2 2]
@@ -1564,6 +1558,9 @@ function det(A::AbstractMatrix{T}) where T
 end
 det(x::Number) = x
 
+# Resolve Issue #40128
+det(A::AbstractMatrix{BigInt}) = det_bareiss(A)
+
 """
     logabsdet(M)
 
@@ -1626,6 +1623,55 @@ logdet(A) = log(det(A))
 
 const NumberArray{T<:Number} = AbstractArray{T}
 
+exactdiv(a, b) = a/b
+exactdiv(a::Integer, b::Integer) = div(a, b)
+
+"""
+    det_bareiss!(M)
+
+Calculates the determinant of a matrix using the
+[Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm) using
+inplace operations.
+
+# Examples
+```jldoctest
+julia> M = [1 0; 2 2]
+2×2 Matrix{Int64}:
+ 1  0
+ 2  2
+
+julia> LinearAlgebra.det_bareiss!(M)
+2
+```
+"""
+function det_bareiss!(M)
+    n = checksquare(M)
+    sign, prev = Int8(1), one(eltype(M))
+    for i in 1:n-1
+        if iszero(M[i,i]) # swap with another col to make nonzero
+            swapto = findfirst(!iszero, @view M[i,i+1:end])
+            isnothing(swapto) && return zero(prev)
+            sign = -sign
+            Base.swapcols!(M, i, i + swapto)
+        end
+        for k in i+1:n, j in i+1:n
+            M[j,k] = exactdiv(M[j,k]*M[i,i] - M[j,i]*M[i,k], prev)
+        end
+        prev = M[i,i]
+    end
+    return sign * M[end,end]
+end
+"""
+    LinearAlgebra.det_bareiss(M)
+
+Calculates the determinant of a matrix using the
+[Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm).
+Also refer to [`det_bareiss!`](@ref).
+"""
+det_bareiss(M) = det_bareiss!(copy(M))
+
+
+
 """
     promote_leaf_eltypes(itr)
 
@@ -1678,7 +1724,7 @@ function normalize!(a::AbstractArray, p::Real=2)
     __normalize!(a, nrm)
 end
 
-@inline function __normalize!(a::AbstractArray, nrm::AbstractFloat)
+@inline function __normalize!(a::AbstractArray, nrm::Real)
     # The largest positive floating point number whose inverse is less than infinity
     δ = inv(prevfloat(typemax(nrm)))
 
diff --git a/stdlib/LinearAlgebra/src/givens.jl b/stdlib/LinearAlgebra/src/givens.jl
index 42709940188112..1a71b0604b5a2f 100644
--- a/stdlib/LinearAlgebra/src/givens.jl
+++ b/stdlib/LinearAlgebra/src/givens.jl
@@ -8,7 +8,7 @@ transpose(R::AbstractRotation) = error("transpose not implemented for $(typeof(R
 
 function (*)(R::AbstractRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
     TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    lmul!(convert(AbstractRotation{TS}, R), TS == S ? copy(A) : convert(AbstractArray{TS}, A))
+    lmul!(convert(AbstractRotation{TS}, R), copy_oftype(A, TS))
 end
 (*)(A::AbstractVector, adjR::Adjoint{<:Any,<:AbstractRotation}) = _absvecormat_mul_adjrot(A, adjR)
 (*)(A::AbstractMatrix, adjR::Adjoint{<:Any,<:AbstractRotation}) = _absvecormat_mul_adjrot(A, adjR)
@@ -31,7 +31,7 @@ conjugated transpose right multiplication `A*G'`. The type doesn't have a `size`
 therefore be multiplied with matrices of arbitrary size as long as `i2<=size(A,2)` for
 `G*A` or `i2<=size(A,1)` for `A*G'`.
 
-See also: [`givens`](@ref)
+See also [`givens`](@ref).
 """
 struct Givens{T} <: AbstractRotation{T}
     i1::Int
@@ -282,7 +282,7 @@ y[i1] = r
 y[i2] = 0
 ```
 
-See also: [`LinearAlgebra.Givens`](@ref)
+See also [`LinearAlgebra.Givens`](@ref).
 """
 function givens(f::T, g::T, i1::Integer, i2::Integer) where T
     if i1 == i2
@@ -308,7 +308,7 @@ B[i1,j] = r
 B[i2,j] = 0
 ```
 
-See also: [`LinearAlgebra.Givens`](@ref)
+See also [`LinearAlgebra.Givens`](@ref).
 """
 givens(A::AbstractMatrix, i1::Integer, i2::Integer, j::Integer) =
     givens(A[i1,j], A[i2,j],i1,i2)
@@ -327,7 +327,7 @@ B[i1] = r
 B[i2] = 0
 ```
 
-See also: [`LinearAlgebra.Givens`](@ref)
+See also [`LinearAlgebra.Givens`](@ref).
 """
 givens(x::AbstractVector, i1::Integer, i2::Integer) =
     givens(x[i1], x[i2], i1, i2)
@@ -403,11 +403,6 @@ end
 *(A::Adjoint{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
 *(A::Transpose{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
 *(A::Transpose{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-# disambiguation methods: *(Adj/Trans of AbsTri or RealHermSymComplex{Herm|Sym}, Adj of AbstractRotation)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
 # disambiguation methods: *(Diag/AbsTri, Adj of AbstractRotation)
 *(A::Diagonal, B::Adjoint{<:Any,<:AbstractRotation}) = A * copy(B)
 *(A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractRotation}) = A * copy(B)
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
index b284b4f940085e..e79786da925aa9 100644
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -60,6 +60,8 @@ parent(H::UpperHessenberg) = H.data
 similar(H::UpperHessenberg, ::Type{T}) where {T} = UpperHessenberg(similar(H.data, T))
 similar(H::UpperHessenberg, ::Type{T}, dims::Dims{N}) where {T,N} = similar(H.data, T, dims)
 
+AbstractMatrix{T}(H::UpperHessenberg) where {T} = UpperHessenberg(AbstractMatrix{T}(H.data))
+
 copy(H::UpperHessenberg) = UpperHessenberg(copy(H.data))
 real(H::UpperHessenberg{<:Real}) = H
 real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1))
@@ -94,17 +96,96 @@ Base.copy(A::Transpose{<:Any,<:UpperHessenberg}) = tril!(transpose!(similar(A.pa
 rmul!(H::UpperHessenberg, x::Number) = (rmul!(H.data, x); H)
 lmul!(x::Number, H::UpperHessenberg) = (lmul!(x, H.data); H)
 
-# (future: we could also have specialized routines for UpperHessenberg * UpperTriangular)
-
 fillstored!(H::UpperHessenberg, x) = (fillband!(H.data, x, -1, size(H,2)-1); H)
 
 +(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data+B.data)
 -(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data-B.data)
-# (future: we could also have specialized routines for UpperHessenberg ± UpperTriangular)
 
-# shift Hessenberg by λI
-+(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H.data + J)
--(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(J - H.data)
+for T = (:UniformScaling, :Diagonal, :Bidiagonal, :Tridiagonal, :SymTridiagonal,
+         :UpperTriangular, :UnitUpperTriangular)
+    for op = (:+, :-)
+        @eval begin
+            $op(H::UpperHessenberg, x::$T) = UpperHessenberg($op(H.data, x))
+            $op(x::$T, H::UpperHessenberg) = UpperHessenberg($op(x, H.data))
+        end
+    end
+end
+
+for T = (:Number, :UniformScaling, :Diagonal)
+    @eval begin
+        *(H::UpperHessenberg, x::$T) = UpperHessenberg(H.data * x)
+        *(x::$T, H::UpperHessenberg) = UpperHessenberg(x * H.data)
+        /(H::UpperHessenberg, x::$T) = UpperHessenberg(H.data / x)
+        \(x::$T, H::UpperHessenberg) = UpperHessenberg(x \ H.data)
+    end
+end
+
+function *(H::UpperHessenberg, U::UpperOrUnitUpperTriangular)
+    T = typeof(oneunit(eltype(H))*oneunit(eltype(U)))
+    HH = similar(H.data, T, size(H))
+    copyto!(HH, H)
+    rmul!(HH, U)
+    UpperHessenberg(HH)
+end
+function *(U::UpperOrUnitUpperTriangular, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(H))*oneunit(eltype(U)))
+    HH = similar(H.data, T, size(H))
+    copyto!(HH, H)
+    lmul!(U, HH)
+    UpperHessenberg(HH)
+end
+
+function /(H::UpperHessenberg, U::UpperTriangular)
+    T = typeof(oneunit(eltype(H))/oneunit(eltype(U)))
+    HH = similar(H.data, T, size(H))
+    copyto!(HH, H)
+    rdiv!(HH, U)
+    UpperHessenberg(HH)
+end
+function /(H::UpperHessenberg, U::UnitUpperTriangular)
+    T = typeof(oneunit(eltype(H))/oneunit(eltype(U)))
+    HH = similar(H.data, T, size(H))
+    copyto!(HH, H)
+    rdiv!(HH, U)
+    UpperHessenberg(HH)
+end
+
+function \(U::UpperTriangular, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(U))\oneunit(eltype(H)))
+    HH = similar(H.data, T, size(H))
+    copyto!(HH, H)
+    ldiv!(U, HH)
+    UpperHessenberg(HH)
+end
+function \(U::UnitUpperTriangular, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(U))\oneunit(eltype(H)))
+    HH = similar(H.data, T, size(H))
+    copyto!(HH, H)
+    ldiv!(U, HH)
+    UpperHessenberg(HH)
+end
+
+function *(H::UpperHessenberg, B::Bidiagonal)
+    TS = promote_op(matprod, eltype(H), eltype(B))
+    if B.uplo == 'U'
+        A_mul_B_td!(UpperHessenberg(zeros(TS, size(H)...)), H, B)
+    else
+        A_mul_B_td!(zeros(TS, size(H)...), H, B)
+    end
+end
+function *(B::Bidiagonal, H::UpperHessenberg)
+    TS = promote_op(matprod, eltype(B), eltype(H))
+    if B.uplo == 'U'
+        A_mul_B_td!(UpperHessenberg(zeros(TS, size(B)...)), B, H)
+    else
+        A_mul_B_td!(zeros(TS, size(B)...), B, H)
+    end
+end
+
+function /(H::UpperHessenberg, B::Bidiagonal)
+    A = Base.@invoke /(H::AbstractMatrix, B::Bidiagonal)
+    B.uplo == 'U' ? UpperHessenberg(A) : A
+end
 
 # Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory
 # (in-place in x) by the RQ algorithm from:
@@ -402,9 +483,9 @@ Q factor:
  0.0  -0.707107   0.707107
 H factor:
 3×3 UpperHessenberg{Float64, Matrix{Float64}}:
-  4.0      -11.3137      -1.41421
- -5.65685    5.0          2.0
-   ⋅        -1.0444e-15   1.0
+  4.0      -11.3137       -1.41421
+ -5.65685    5.0           2.0
+   ⋅        -8.88178e-16   1.0
 
 julia> F.Q * F.H * F.Q'
 3×3 Matrix{Float64}:
@@ -485,28 +566,30 @@ function AbstractMatrix(F::Hessenberg)
     end
 end
 
+# adjoint(Q::HessenbergQ{<:Real})
+
 lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
     LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-rmul!(X::StridedMatrix{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
+rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
     LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
 lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
     (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
+rmul!(X::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
     (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
 
 lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
     LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X)
-rmul!(X::StridedMatrix{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
+rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
     LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X)
 lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
     (Q = adjQ.parent; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
+rmul!(X::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
     (Q = adjQ.parent; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
 
 lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')'
-rmul!(X::Adjoint{T,<:StridedMatrix{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
+rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
 lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
-rmul!(X::Adjoint{T,<:StridedMatrix{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
+rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
 
 # multiply x by the entries of M in the upper-k triangle, which contains
 # the entries of the upper-Hessenberg matrix H for k=-1
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
index e0d86c67de31e9..05e080e00450d9 100644
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ b/stdlib/LinearAlgebra/src/lapack.jl
@@ -3775,8 +3775,8 @@ for (stev, stebz, stegr, stein, elty) in
             require_one_based_indexing(dv, ev)
             chkstride1(dv, ev)
             n = length(dv)
-            if length(ev) != n - 1
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than dv's length, $n)"))
+            if length(ev) != n - 1 && length(ev) != n
+                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than or equal to dv's length, $n)"))
             end
             Zmat = similar(dv, $elty, (n, job != 'N' ? n : 0))
             work = Vector{$elty}(undef, max(1, 2n-2))
@@ -3835,7 +3835,7 @@ for (stev, stebz, stegr, stein, elty) in
                 eev = copy(ev)
                 eev[n] = zero($elty)
             else
-                throw(DimensionMismatch("ev has length $(length(ev)) but should be either $(length(dv) - 1) or $(length(dv))"))
+                throw(DimensionMismatch("ev has length $ne but needs one less than or equal to dv's length, $n)"))
             end
 
             abstol = Vector{$elty}(undef, 1)
@@ -3878,10 +3878,15 @@ for (stev, stebz, stegr, stein, elty) in
             require_one_based_indexing(dv, ev_in, w_in, iblock_in, isplit_in)
             chkstride1(dv, ev_in, w_in, iblock_in, isplit_in)
             n = length(dv)
-            if length(ev_in) != n - 1
-                throw(DimensionMismatch("ev_in has length $(length(ev_in)) but needs one less than dv's length, $n)"))
+            ne = length(ev_in)
+            if ne == n - 1
+                ev = [ev_in; zero($elty)]
+            elseif ne == n
+                ev = copy(ev_in)
+                ev[n] = zero($elty)
+            else
+                throw(DimensionMismatch("ev_in has length $ne but needs one less than or equal to dv's length, $n)"))
             end
-            ev = [ev_in; zeros($elty,1)]
             ldz = n #Leading dimension
             #Number of eigenvalues to find
             if !(1 <= length(w_in) <= n)
@@ -5958,9 +5963,9 @@ for (gees, gges, elty) in
             alphar = similar(A, $elty, n)
             alphai = similar(A, $elty, n)
             beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? n : 1
+            ldvsl = jobvsl == 'V' ? max(1, n) : 1
             vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? n : 1
+            ldvsr = jobvsr == 'V' ? max(1, n) : 1
             vsr = similar(A, $elty, ldvsr, n)
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
@@ -6053,9 +6058,9 @@ for (gees, gges, elty, relty) in
             sdim = BlasInt(0)
             alpha = similar(A, $elty, n)
             beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? n : 1
+            ldvsl = jobvsl == 'V' ? max(1, n) : 1
             vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? n : 1
+            ldvsr = jobvsr == 'V' ? max(1, n) : 1
             vsr = similar(A, $elty, ldvsr, n)
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
index 4ed93188d1f615..b1a2dc24b34492 100644
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -17,6 +17,7 @@ const LBT_INTERFACE_MAP = Dict(
     LBT_INTERFACE_ILP64   => :ilp64,
     LBT_INTERFACE_UNKNOWN => :unknown,
 )
+const LBT_INV_INTERFACE_MAP = Dict(v => k for (k, v) in LBT_INTERFACE_MAP)
 
 const LBT_F2C_PLAIN         =  0
 const LBT_F2C_REQUIRED      =  1
@@ -26,6 +27,7 @@ const LBT_F2C_MAP = Dict(
     LBT_F2C_REQUIRED => :required,
     LBT_F2C_UNKNOWN  => :unknown,
 )
+const LBT_INV_F2C_MAP = Dict(v => k for (k, v) in LBT_F2C_MAP)
 
 struct LBTLibraryInfo
     libname::String
@@ -103,6 +105,42 @@ struct LBTConfig
     end
 end
 
+Base.show(io::IO, lbt::LBTLibraryInfo) = print(io, "LBTLibraryInfo(", basename(lbt.libname), ", ", lbt.interface, ")")
+function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTLibraryInfo)
+    summary(io, lbt); println(io)
+    println(io, "├ Library: ", basename(lbt.libname))
+    println(io, "├ Interface: ", lbt.interface)
+      print(io, "└ F2C: ", lbt.f2c)
+end
+
+function Base.show(io::IO, lbt::LBTConfig)
+    if length(lbt.loaded_libs) <= 3
+        print(io, "LBTConfig(")
+        gen = (string("[", uppercase(string(l.interface)), "] ",
+            basename(l.libname)) for l in lbt.loaded_libs)
+        print(io, join(gen, ", "))
+        print(io, ")")
+    else
+        print(io, "LBTConfig(...)")
+    end
+end
+function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTConfig)
+    summary(io, lbt); println(io)
+    println(io, "Libraries: ")
+    for (i,l) in enumerate(lbt.loaded_libs)
+        char = i == length(lbt.loaded_libs) ? "└" : "├"
+        interface_str = if l.interface == :ilp64
+            "ILP64"
+        elseif l.interface == :lp64
+            " LP64"
+        else
+            "UNKWN"
+        end
+        print(io, char, " [", interface_str,"] ", basename(l.libname))
+        i !== length(lbt.loaded_libs) && println()
+    end
+end
+
 function lbt_get_config()
     config_ptr = ccall((:lbt_get_config, libblastrampoline), Ptr{lbt_config_t}, ())
     return LBTConfig(unsafe_load(config_ptr))
@@ -128,14 +166,74 @@ function lbt_get_default_func()
     return ccall((:lbt_get_default_func, libblastrampoline), Ptr{Cvoid}, ())
 end
 
-#=
-Don't define footgun API (yet)
+"""
+    lbt_find_backing_library(symbol_name, interface; config::LBTConfig = lbt_get_config())
 
-function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
-    return ccall((:lbt_get_forward, libblastrampoline), Ptr{Cvoid}, (Cstring, Int32, Int32), symbol_name, interface, f2c)
+Return the `LBTLibraryInfo` that represents the backing library for the given symbol
+exported from libblastrampoline.  This allows us to discover which library will service
+a particular BLAS call from Julia code.  This method returns `nothing` if either of the
+following conditions are met:
+
+ * No loaded library exports the desired symbol (the default function will be called)
+ * The symbol was set via `lbt_set_forward()`, which does not track library provenance.
+
+If the given `symbol_name` is not contained within the list of exported symbols, an
+`ArgumentError` will be thrown.
+"""
+function lbt_find_backing_library(symbol_name, interface::Symbol;
+                                  config::LBTConfig = lbt_get_config())
+    if interface ∉ (:ilp64, :lp64)
+        throw(Argument("Invalid interface specification: '$(interface)'"))
+    end
+    symbol_idx = findfirst(s -> s == symbol_name, config.exported_symbols)
+    if symbol_idx === nothing
+        throw(ArgumentError("Invalid exported symbol name '$(symbol_name)'"))
+    end
+    # Convert to zero-indexed
+    symbol_idx -= 1
+
+    forward_byte_offset = div(symbol_idx, 8)
+    forward_byte_mask = 1 << mod(symbol_idx, 8)
+    for lib in filter(l -> l.interface == interface, config.loaded_libs)
+        if lib.active_forwards[forward_byte_offset+1] & forward_byte_mask != 0x00
+            return lib
+        end
+    end
+
+    # No backing library was found
+    return nothing
 end
 
+
+## NOTE: Manually setting forwards is referred to as the 'footgun API'.  It allows truly
+## bizarre and complex setups to be created.  If you run into strange errors while using
+## it, the first thing you should ask yourself is whether you've set things up properly.
 function lbt_set_forward(symbol_name, addr, interface, f2c = LBT_F2C_PLAIN; verbose::Bool = false)
-    return ccall((:lbt_set_forward, libblastrampoline), Int32, (Cstring, Ptr{Cvoid}, Int32, Int32, Int32), symbol_name, addr, interface, f2c, verbose ? 1 : 0)
+    return ccall(
+        (:lbt_set_forward, libblastrampoline),
+        Int32,
+        (Cstring, Ptr{Cvoid}, Int32, Int32, Int32),
+        string(symbol_name),
+        addr,
+        Int32(interface),
+        Int32(f2c),
+        verbose ? Int32(1) : Int32(0),
+    )
+end
+function lbt_set_forward(symbol_name, addr, interface::Symbol, f2c::Symbol = :plain; kwargs...)
+    return lbt_set_forward(symbol_name, addr, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c]; kwargs...)
+end
+
+function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
+    return ccall(
+        (:lbt_get_forward, libblastrampoline),
+        Ptr{Cvoid},
+        (Cstring, Int32, Int32),
+        string(symbol_name),
+        Int32(interface),
+        Int32(f2c),
+    )
+end
+function lbt_get_forward(symbol_name, interface::Symbol, f2c::Symbol = :plain)
+    return lbt_get_forward(symbol_name, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c])
 end
-=#
\ No newline at end of file
diff --git a/stdlib/LinearAlgebra/src/ldlt.jl b/stdlib/LinearAlgebra/src/ldlt.jl
index d0f59ebb9ff1b8..f1ea10aa0f6149 100644
--- a/stdlib/LinearAlgebra/src/ldlt.jl
+++ b/stdlib/LinearAlgebra/src/ldlt.jl
@@ -77,6 +77,9 @@ function getproperty(F::LDLt, d::Symbol)
     end
 end
 
+adjoint(F::LDLt{<:Real,<:SymTridiagonal}) = F
+adjoint(F::LDLt) = LDLt(copy(adjoint(F.data)))
+
 function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LDLt)
     summary(io, F); println(io)
     println(io, "L factor:")
diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl
index 21ba6a27ac5805..301aae12cc84d7 100644
--- a/stdlib/LinearAlgebra/src/lq.jl
+++ b/stdlib/LinearAlgebra/src/lq.jl
@@ -22,9 +22,15 @@ julia> A = [5. 7.; -2. -4.]
  -2.0  -4.0
 
 julia> S = lq(A)
-LQ{Float64, Matrix{Float64}} with factors L and Q:
-[-8.60233 0.0; 4.41741 -0.697486]
-[-0.581238 -0.813733; -0.813733 0.581238]
+LQ{Float64, Matrix{Float64}}
+L factor:
+2×2 Matrix{Float64}:
+ -8.60233   0.0
+  4.41741  -0.697486
+Q factor:
+2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}}:
+ -0.581238  -0.813733
+ -0.813733   0.581238
 
 julia> S.L * S.Q
 2×2 Matrix{Float64}:
@@ -56,12 +62,10 @@ Base.iterate(S::LQ) = (S.L, Val(:Q))
 Base.iterate(S::LQ, ::Val{:Q}) = (S.Q, Val(:done))
 Base.iterate(S::LQ, ::Val{:done}) = nothing
 
-struct LQPackedQ{T,S<:AbstractMatrix} <: AbstractMatrix{T}
-    factors::Matrix{T}
+struct LQPackedQ{T,S<:AbstractMatrix{T}} <: AbstractMatrix{T}
+    factors::S
     τ::Vector{T}
-    LQPackedQ{T,S}(factors::AbstractMatrix{T}, τ::Vector{T}) where {T,S<:AbstractMatrix} = new(factors, τ)
 end
-LQPackedQ(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = LQPackedQ{T,typeof(factors)}(factors, τ)
 
 
 """
@@ -92,9 +96,15 @@ julia> A = [5. 7.; -2. -4.]
  -2.0  -4.0
 
 julia> S = lq(A)
-LQ{Float64, Matrix{Float64}} with factors L and Q:
-[-8.60233 0.0; 4.41741 -0.697486]
-[-0.581238 -0.813733; -0.813733 0.581238]
+LQ{Float64, Matrix{Float64}}
+L factor:
+2×2 Matrix{Float64}:
+ -8.60233   0.0
+  4.41741  -0.697486
+Q factor:
+2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}}:
+ -0.581238  -0.813733
+ -0.813733   0.581238
 
 julia> S.L * S.Q
 2×2 Matrix{Float64}:
@@ -107,14 +117,16 @@ julia> l == S.L &&  q == S.Q
 true
 ```
 """
-lq(A::StridedMatrix{<:BlasFloat})  = lq!(copy(A))
-lq(x::Number) = lq(fill(x,1,1))
+lq(A::AbstractMatrix{T}) where {T}  = lq!(copy_oftype(A, lq_eltype(T)))
+lq(x::Number) = lq!(fill(convert(lq_eltype(typeof(x)), x), 1, 1))
+
+lq_eltype(::Type{T}) where {T} = typeof(zero(T) / sqrt(abs2(one(T))))
 
 copy(A::LQ) = LQ(copy(A.factors), copy(A.τ))
 
 LQ{T}(A::LQ) where {T} = LQ(convert(AbstractMatrix{T}, A.factors), convert(Vector{T}, A.τ))
-Factorization{T}(A::LQ{T}) where {T} = A
 Factorization{T}(A::LQ) where {T} = LQ{T}(A)
+
 AbstractMatrix(A::LQ) = A.L*A.Q
 AbstractArray(A::LQ) = AbstractMatrix(A)
 Matrix(A::LQ) = Array(AbstractArray(A))
@@ -141,12 +153,12 @@ Base.propertynames(F::LQ, private::Bool=false) =
 getindex(A::LQPackedQ, i::Integer, j::Integer) =
     lmul!(A, setindex!(zeros(eltype(A), size(A, 2)), 1, j))[i]
 
-function show(io::IO, ::MIME"text/plain", C::LQ)
-    println(io, typeof(C), " with factors L and Q:")
-    io = IOContext(io, :compact => true)
-    show(io, C.L)
-    println(io)
-    show(io, C.Q)
+function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ)
+    summary(io, F); println(io)
+    println(io, "L factor:")
+    show(io, mime, F.L)
+    println(io, "\nQ factor:")
+    show(io, mime, F.Q)
 end
 
 LQPackedQ{T}(Q::LQPackedQ) where {T} = LQPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(Vector{T}, Q.τ))
@@ -182,7 +194,7 @@ function lmul!(A::LQ, B::StridedVecOrMat)
 end
 function *(A::LQ{TA}, B::StridedVecOrMat{TB}) where {TA,TB}
     TAB = promote_type(TA, TB)
-    _cut_B(lmul!(Factorization{TAB}(A), copy_oftype(B, TAB)), 1:size(A,1))
+    _cut_B(lmul!(convert(Factorization{TAB}, A), copy_oftype(B, TAB)), 1:size(A,1))
 end
 
 ## Multiplication by Q
@@ -195,9 +207,9 @@ end
 
 ### QcB
 lmul!(adjA::Adjoint{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (A = adjA.parent; LAPACK.ormlq!('L','T',A.factors,A.τ,B))
+    (A = adjA.parent; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B))
 lmul!(adjA::Adjoint{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (A = adjA.parent; LAPACK.ormlq!('L','C',A.factors,A.τ,B))
+    (A = adjA.parent; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
 
 function *(adjA::Adjoint{<:Any,<:LQPackedQ}, B::StridedVecOrMat)
     A = adjA.parent
@@ -220,11 +232,11 @@ function *(A::LQPackedQ, adjB::Adjoint{<:Any,<:StridedVecOrMat})
     return lmul!(A, BB)
 end
 function *(adjA::Adjoint{<:Any,<:LQPackedQ}, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    A, B = adjA.parent, adjB.parent
-    TAB = promote_type(eltype(A), eltype(B))
+    B = adjB.parent
+    TAB = promote_type(eltype(adjA.parent), eltype(B))
     BB = similar(B, TAB, (size(B, 2), size(B, 1)))
     adjoint!(BB, B)
-    return lmul!(adjoint(A), BB)
+    return lmul!(adjA, BB)
 end
 
 # in-place right-application of LQPackedQs
@@ -306,17 +318,6 @@ _rightappdimmismatch(rowsorcols) =
         "or (2) the number of rows of that (LQPackedQ) matrix's internal representation ",
         "(the factorization's originating matrix's number of rows)")))
 
-
-function (\)(A::LQ{TA},B::StridedVecOrMat{TB}) where {TA,TB}
-    S = promote_type(TA,TB)
-    m, n = size(A)
-    m ≤ n || throw(DimensionMismatch("LQ solver does not support overdetermined systems (more rows than columns)"))
-    m == size(B,1) || throw(DimensionMismatch("Both inputs should have the same number of rows"))
-    AA = Factorization{S}(A)
-    X = _zeros(S, B, n)
-    X[1:size(B, 1), :] = B
-    return ldiv!(AA, X)
-end
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns
 function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
@@ -330,12 +331,25 @@ function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
 end
 
 
-function ldiv!(A::LQ{T}, B::StridedVecOrMat{T}) where T
+function ldiv!(A::LQ, B::StridedVecOrMat)
     require_one_based_indexing(B)
+    m, n = size(A)
+    m ≤ n || throw(DimensionMismatch("LQ solver does not support overdetermined systems (more rows than columns)"))
+
     ldiv!(LowerTriangular(A.L), view(B, 1:size(A,1), axes(B,2)))
     return lmul!(adjoint(A.Q), B)
 end
 
+function ldiv!(Fadj::Adjoint{<:Any,<:LQ}, B::StridedVecOrMat)
+    require_one_based_indexing(B)
+    m, n = size(Fadj)
+    m >= n || throw(DimensionMismatch("solver does not support underdetermined systems (more columns than rows)"))
+
+    F = parent(Fadj)
+    lmul!(F.Q, B)
+    ldiv!(UpperTriangular(adjoint(F.L)), view(B, 1:size(F,1), axes(B,2)))
+    return B
+end
 
 # In LQ factorization, `Q` is expressed as the product of the adjoint of the
 # reflectors.  Thus, `det` has to be conjugated.
diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl
index 2d915680d5381d..22ed1fc5a49dc6 100644
--- a/stdlib/LinearAlgebra/src/lu.jl
+++ b/stdlib/LinearAlgebra/src/lu.jl
@@ -76,22 +76,33 @@ adjoint(F::LU) = Adjoint(F)
 transpose(F::LU) = Transpose(F)
 
 # StridedMatrix
-function lu!(A::StridedMatrix{T}, pivot::Union{Val{false}, Val{true}} = Val(true);
-             check::Bool = true) where T<:BlasFloat
-    if pivot === Val(false)
-        return generic_lufact!(A, pivot; check = check)
-    end
+lu(A::StridedMatrix, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) =
+    lu!(copy_oftype(A, lutype(eltype(A))), pivot; check=check)
+
+lu!(A::StridedMatrix{<:BlasFloat}; check::Bool = true) = lu!(A, RowMaximum(); check=check)
+function lu!(A::StridedMatrix{T}, ::RowMaximum; check::Bool = true) where {T<:BlasFloat}
     lpt = LAPACK.getrf!(A)
     check && checknonsingular(lpt[3])
     return LU{T,typeof(A)}(lpt[1], lpt[2], lpt[3])
 end
-function lu!(A::HermOrSym, pivot::Union{Val{false}, Val{true}} = Val(true); check::Bool = true)
+function lu!(A::StridedMatrix{<:BlasFloat}, pivot::NoPivot; check::Bool = true)
+    return generic_lufact!(A, pivot; check = check)
+end
+
+lu(A::HermOrSym, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) =
+    lu!(copy_oftype(A, lutype(eltype(A))), pivot; check=check)
+
+function lu!(A::HermOrSym, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true)
     copytri!(A.data, A.uplo, isa(A, Hermitian))
     lu!(A.data, pivot; check = check)
 end
+# for backward compatibility
+# TODO: remove towards Julia v2
+@deprecate lu!(A::Union{StridedMatrix,HermOrSym,Tridiagonal}, ::Val{true}; check::Bool = true) lu!(A, RowMaximum(); check=check)
+@deprecate lu!(A::Union{StridedMatrix,HermOrSym,Tridiagonal}, ::Val{false}; check::Bool = true) lu!(A, NoPivot(); check=check)
 
 """
-    lu!(A, pivot=Val(true); check = true) -> LU
+    lu!(A, pivot = RowMaximum(); check = true) -> LU
 
 `lu!` is the same as [`lu`](@ref), but saves space by overwriting the
 input `A`, instead of creating a copy. An [`InexactError`](@ref)
@@ -127,19 +138,22 @@ Stacktrace:
 [...]
 ```
 """
-lu!(A::StridedMatrix, pivot::Union{Val{false}, Val{true}} = Val(true); check::Bool = true) =
+lu!(A::StridedMatrix, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) =
     generic_lufact!(A, pivot; check = check)
-function generic_lufact!(A::StridedMatrix{T}, ::Val{Pivot} = Val(true);
-                         check::Bool = true) where {T,Pivot}
+function generic_lufact!(A::StridedMatrix{T}, pivot::Union{RowMaximum,NoPivot} = RowMaximum();
+                         check::Bool = true) where {T}
+    # Extract values
     m, n = size(A)
     minmn = min(m,n)
+
+    # Initialize variables
     info = 0
     ipiv = Vector{BlasInt}(undef, minmn)
     @inbounds begin
         for k = 1:minmn
             # find index max
             kp = k
-            if Pivot && k < m
+            if pivot === RowMaximum() && k < m
                 amax = abs(A[k, k])
                 for i = k+1:m
                     absi = abs(A[i,k])
@@ -175,7 +189,7 @@ function generic_lufact!(A::StridedMatrix{T}, ::Val{Pivot} = Val(true);
             end
         end
     end
-    check && checknonsingular(info, Val{Pivot}())
+    check && checknonsingular(info, pivot)
     return LU{T,typeof(A)}(A, ipiv, convert(BlasInt, info))
 end
 
@@ -200,7 +214,7 @@ end
 
 # for all other types we must promote to a type which is stable under division
 """
-    lu(A, pivot=Val(true); check = true) -> F::LU
+    lu(A, pivot = RowMaximum(); check = true) -> F::LU
 
 Compute the LU factorization of `A`.
 
@@ -211,7 +225,7 @@ validity (via [`issuccess`](@ref)) lies with the user.
 In most cases, if `A` is a subtype `S` of `AbstractMatrix{T}` with an element
 type `T` supporting `+`, `-`, `*` and `/`, the return type is `LU{T,S{T}}`. If
 pivoting is chosen (default) the element type should also support [`abs`](@ref) and
-[`<`](@ref).
+[`<`](@ref). Pivoting can be turned off by passing `pivot = NoPivot()`.
 
 The individual components of the factorization `F` can be accessed via [`getproperty`](@ref):
 
@@ -267,11 +281,14 @@ julia> l == F.L && u == F.U && p == F.p
 true
 ```
 """
-function lu(A::AbstractMatrix{T}, pivot::Union{Val{false}, Val{true}}=Val(true);
-            check::Bool = true) where T
+function lu(A::AbstractMatrix{T}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) where {T}
     S = lutype(T)
-    lu!(copy_oftype(A, S), pivot; check = check)
+    lu!(copy_to_array(A, S), pivot; check = check)
 end
+# TODO: remove for Julia v2.0
+@deprecate lu(A::AbstractMatrix, ::Val{true}; check::Bool = true) lu(A, RowMaximum(); check=check)
+@deprecate lu(A::AbstractMatrix, ::Val{false}; check::Bool = true) lu(A, NoPivot(); check=check)
+
 
 lu(S::LU) = S
 function lu(x::Number; check::Bool=true)
@@ -480,10 +497,15 @@ inv(A::LU{<:BlasFloat,<:StridedMatrix}) = inv!(copy(A))
 
 # Tridiagonal
 
+lu(A::Tridiagonal{T}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) where T =
+    lu!(copy_oftype(A, lutype(T)), pivot; check = check)
+
 # See dgttrf.f
-function lu!(A::Tridiagonal{T,V}, pivot::Union{Val{false}, Val{true}} = Val(true);
-             check::Bool = true) where {T,V}
+function lu!(A::Tridiagonal{T,V}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) where {T,V}
+    # Extract values
     n = size(A, 1)
+
+    # Initialize variables
     info = 0
     ipiv = Vector{BlasInt}(undef, n)
     dl = A.dl
@@ -500,7 +522,7 @@ function lu!(A::Tridiagonal{T,V}, pivot::Union{Val{false}, Val{true}} = Val(true
         end
         for i = 1:n-2
             # pivot or not?
-            if pivot === Val(false) || abs(d[i]) >= abs(dl[i])
+            if pivot === NoPivot() || abs(d[i]) >= abs(dl[i])
                 # No interchange
                 if d[i] != 0
                     fact = dl[i]/d[i]
@@ -523,7 +545,7 @@ function lu!(A::Tridiagonal{T,V}, pivot::Union{Val{false}, Val{true}} = Val(true
         end
         if n > 1
             i = n-1
-            if pivot === Val(false) || abs(d[i]) >= abs(dl[i])
+            if pivot === NoPivot() || abs(d[i]) >= abs(dl[i])
                 if d[i] != 0
                     fact = dl[i]/d[i]
                     dl[i] = fact
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
index 27bd9c2f23b153..2b315b0cf6080a 100644
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ b/stdlib/LinearAlgebra/src/matmul.jl
@@ -9,7 +9,7 @@ matprod(x, y) = x*y + x*y
 dot(x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where {T<:BlasReal} = BLAS.dot(x, y)
 dot(x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where {T<:BlasComplex} = BLAS.dotc(x, y)
 
-function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector{T}, ry::Union{UnitRange{TI},AbstractRange{TI}}) where {T<:BlasReal,TI<:Integer}
+function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasReal,TI<:Integer}
     if length(rx) != length(ry)
         throw(DimensionMismatch("length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
     end
@@ -22,7 +22,7 @@ function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector
     GC.@preserve x y BLAS.dot(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx), pointer(y)+(first(ry)-1)*sizeof(T), step(ry))
 end
 
-function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector{T}, ry::Union{UnitRange{TI},AbstractRange{TI}}) where {T<:BlasComplex,TI<:Integer}
+function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasComplex,TI<:Integer}
     if length(rx) != length(ry)
         throw(DimensionMismatch("length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
     end
@@ -52,15 +52,9 @@ function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S}
 end
 
 # these will throw a DimensionMismatch unless B has 1 row (or 1 col for transposed case):
-function *(a::AbstractVector, transB::Transpose{<:Any,<:AbstractMatrix})
-    B = transB.parent
-    reshape(a,length(a),1)*transpose(B)
-end
-function *(a::AbstractVector, adjB::Adjoint{<:Any,<:AbstractMatrix})
-    B = adjB.parent
-    reshape(a,length(a),1)*adjoint(B)
-end
-(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a,length(a),1)*B
+(*)(a::AbstractVector, tB::Transpose{<:Any,<:AbstractMatrix}) = reshape(a, length(a), 1) * tB
+(*)(a::AbstractVector, adjB::Adjoint{<:Any,<:AbstractMatrix}) = reshape(a, length(a), 1) * adjB
+(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a, length(a), 1) * B
 
 @inline mul!(y::StridedVector{T}, A::StridedVecOrMat{T}, x::StridedVector{T},
              alpha::Number, beta::Number) where {T<:BlasFloat} =
@@ -81,53 +75,39 @@ end
              alpha::Number, beta::Number) =
     generic_matvecmul!(y, 'N', A, x, MulAddMul(alpha, beta))
 
-function *(transA::Transpose{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
-    A = transA.parent
+function *(tA::Transpose{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,size(A,2)), transpose(A), convert(AbstractVector{TS}, x))
+    mul!(similar(x, TS, size(tA, 1)), tA, convert(AbstractVector{TS}, x))
 end
-function *(transA::Transpose{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
-    A = transA.parent
+function *(tA::Transpose{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,size(A,2)), transpose(A), x)
-end
-@inline function mul!(y::StridedVector{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = transA.parent
-    return gemv!(y, 'T', A, x, alpha, beta)
-end
-@inline function mul!(y::AbstractVector, transA::Transpose{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
-                      alpha::Number, beta::Number)
-    A = transA.parent
-    return generic_matvecmul!(y, 'T', A, x, MulAddMul(alpha, beta))
+    mul!(similar(x, TS, size(tA, 1)), tA, x)
 end
+@inline mul!(y::StridedVector{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
+                      alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemv!(y, 'T', tA.parent, x, alpha, beta)
+@inline mul!(y::AbstractVector, tA::Transpose{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
+                      alpha::Number, beta::Number) =
+    generic_matvecmul!(y, 'T', tA.parent, x, MulAddMul(alpha, beta))
 
 function *(adjA::Adjoint{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
-    A = adjA.parent
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,size(A,2)), adjoint(A) ,convert(AbstractVector{TS},x))
+    mul!(similar(x, TS, size(adjA, 1)), adjA, convert(AbstractVector{TS}, x))
 end
 function *(adjA::Adjoint{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
-    A = adjA.parent
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,size(A,2)), adjoint(A), x)
+    mul!(similar(x, TS, size(adjA, 1)), adjA, x)
 end
 
-@inline function mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasReal}
-    A = adjA.parent
-    return mul!(y, transpose(A), x, alpha, beta)
-end
-@inline function mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasComplex}
-    A = adjA.parent
-    return gemv!(y, 'C', A, x, alpha, beta)
-end
-@inline function mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
-                      alpha::Number, beta::Number)
-    A = adjA.parent
-    return generic_matvecmul!(y, 'C', A, x, MulAddMul(alpha, beta))
-end
+@inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
+                      alpha::Number, beta::Number) where {T<:BlasReal} =
+    mul!(y, transpose(adjA.parent), x, alpha, beta)
+@inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
+                      alpha::Number, beta::Number) where {T<:BlasComplex} =
+    gemv!(y, 'C', adjA.parent, x, alpha, beta)
+@inline mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
+                      alpha::Number, beta::Number) =
+    generic_matvecmul!(y, 'C', adjA.parent, x, MulAddMul(alpha, beta))
 
 # Vector-Matrix multiplication
 (*)(x::AdjointAbsVec,   A::AbstractMatrix) = (A'*x')'
@@ -368,25 +348,23 @@ julia> lmul!(F.Q, B)
 """
 lmul!(A, B)
 
-@inline function mul!(C::StridedMatrix{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
+@inline function mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
                  alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = transA.parent
-    if A===B
+    A = tA.parent
+    if A === B
         return syrk_wrapper!(C, 'T', A, MulAddMul(alpha, beta))
     else
         return gemm_wrapper!(C, 'T', 'N', A, B, MulAddMul(alpha, beta))
     end
 end
-@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
-                 alpha::Number, beta::Number)
-    A = transA.parent
-    return generic_matmatmul!(C, 'T', 'N', A, B, MulAddMul(alpha, beta))
-end
+@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'T', 'N', tA.parent, B, MulAddMul(alpha, beta))
 
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, transB::Transpose{<:Any,<:StridedVecOrMat{T}},
+@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
                  alpha::Number, beta::Number) where {T<:BlasFloat}
-    B = transB.parent
-    if A===B
+    B = tB.parent
+    if A === B
         return syrk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
     else
         return gemm_wrapper!(C, 'N', 'T', A, B, MulAddMul(alpha, beta))
@@ -395,74 +373,56 @@ end
 # Complex matrix times transposed real matrix. Reinterpret the first matrix to real for efficiency.
 for elty in (Float32,Float64)
     @eval begin
-        @inline function mul!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, transB::Transpose{<:Any,<:StridedVecOrMat{$elty}},
+        @inline function mul!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, tB::Transpose{<:Any,<:StridedVecOrMat{$elty}},
                          alpha::Real, beta::Real)
             Afl = reinterpret($elty, A)
             Cfl = reinterpret($elty, C)
-            mul!(Cfl, Afl, transB, alpha, beta)
+            mul!(Cfl, Afl, tB, alpha, beta)
             return C
         end
     end
 end
 # collapsing the following two defs with C::AbstractVecOrMat yields ambiguities
-@inline mul!(C::AbstractVector, A::AbstractVecOrMat, transB::Transpose{<:Any,<:AbstractVecOrMat},
+@inline mul!(C::AbstractVector, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat},
              alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'T', A, transB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, transB::Transpose{<:Any,<:AbstractVecOrMat},
+    generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat},
              alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'T', A, transB.parent, MulAddMul(alpha, beta))
-
-@inline function mul!(C::StridedMatrix{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, transB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = transA.parent
-    B = transB.parent
-    return gemm_wrapper!(C, 'T', 'T', A, B, MulAddMul(alpha, beta))
-end
-@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, transB::Transpose{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    A = transA.parent
-    B = transB.parent
-    return generic_matmatmul!(C, 'T', 'T', A, B, MulAddMul(alpha, beta))
-end
-
-@inline function mul!(C::StridedMatrix{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, transB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = transA.parent
-    B = transB.parent
-    return gemm_wrapper!(C, 'T', 'C', A, B, MulAddMul(alpha, beta))
-end
-@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, transB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    A = transA.parent
-    B = transB.parent
-    return generic_matmatmul!(C, 'T', 'C', A, B, MulAddMul(alpha, beta))
-end
-
-@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
-                 alpha::Real, beta::Real) where {T<:BlasReal}
-    A = adjA.parent
-    return mul!(C, transpose(A), B, alpha, beta)
-end
+    generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemm_wrapper!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemm_wrapper!(C, 'T', 'C', tA.parent, adjB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Adjoint{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'T', 'C', tA.parent, tB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
+                 alpha::Real, beta::Real) where {T<:BlasReal} =
+    mul!(C, transpose(adjA.parent), B, alpha, beta)
 @inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
                  alpha::Number, beta::Number) where {T<:BlasComplex}
     A = adjA.parent
-    if A===B
+    if A === B
         return herk_wrapper!(C, 'C', A, MulAddMul(alpha, beta))
     else
         return gemm_wrapper!(C, 'C', 'N', A, B, MulAddMul(alpha, beta))
     end
 end
-@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
-                 alpha::Number, beta::Number)
-    A = adjA.parent
-    return generic_matmatmul!(C, 'C', 'N', A, B, MulAddMul(alpha, beta))
-end
+@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'C', 'N', adjA.parent, B, MulAddMul(alpha, beta))
 
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{<:BlasReal}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    B = adjB.parent
-    return mul!(C, A, transpose(B), alpha, beta)
-end
+@inline mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{<:BlasReal}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    mul!(C, A, transpose(adjB.parent), alpha, beta)
 @inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
                  alpha::Number, beta::Number) where {T<:BlasComplex}
     B = adjB.parent
@@ -472,37 +432,24 @@ end
         return gemm_wrapper!(C, 'N', 'C', A, B, MulAddMul(alpha, beta))
     end
 end
-@inline function mul!(C::AbstractMatrix, A::AbstractVecOrMat, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    B = adjB.parent
-    return generic_matmatmul!(C, 'N', 'C', A, B, MulAddMul(alpha, beta))
-end
+@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'N', 'C', A, adjB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemm_wrapper!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemm_wrapper!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta))
 
-@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = adjA.parent
-    B = adjB.parent
-    return gemm_wrapper!(C, 'C', 'C', A, B, MulAddMul(alpha, beta))
-end
-@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    A = adjA.parent
-    B = adjB.parent
-    return generic_matmatmul!(C, 'C', 'C', A, B, MulAddMul(alpha, beta))
-end
-
-@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, transB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = adjA.parent
-    B = transB.parent
-    return gemm_wrapper!(C, 'C', 'T', A, B, MulAddMul(alpha, beta))
-end
-@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, transB::Transpose{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    A = adjA.parent
-    B = transB.parent
-    return generic_matmatmul!(C, 'C', 'T', A, B, MulAddMul(alpha, beta))
-end
 # Supporting functions for matrix multiplication
 
 # copy transposed(adjoint) of upper(lower) side-digonals. Optionally include diagonal.
@@ -678,22 +625,22 @@ end
 
 lapack_size(t::AbstractChar, M::AbstractVecOrMat) = (size(M, t=='N' ? 1 : 2), size(M, t=='N' ? 2 : 1))
 
-function copyto!(B::AbstractVecOrMat, ir_dest::UnitRange{Int}, jr_dest::UnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::UnitRange{Int}, jr_src::UnitRange{Int})
+function copyto!(B::AbstractVecOrMat, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
     if tM == 'N'
         copyto!(B, ir_dest, jr_dest, M, ir_src, jr_src)
     else
         LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(B)
+        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
     end
     B
 end
 
-function copy_transpose!(B::AbstractMatrix, ir_dest::UnitRange{Int}, jr_dest::UnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::UnitRange{Int}, jr_src::UnitRange{Int})
+function copy_transpose!(B::AbstractMatrix, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
     if tM == 'N'
         LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, ir_src, jr_src)
     else
         copyto!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(B)
+        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
     end
     B
 end
@@ -1081,3 +1028,141 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
     end # inbounds
     C
 end
+
+const RealOrComplex = Union{Real,Complex}
+
+# Three-argument *
+"""
+    *(A, B::AbstractMatrix, C)
+    A * B * C * D
+
+Chained multiplication of 3 or 4 matrices is done in the most efficient sequence,
+based on the sizes of the arrays. That is, the number of scalar multiplications needed
+for `(A * B) * C` (with 3 dense matrices) is compared to that for `A * (B * C)`
+to choose which of these to execute.
+
+If the last factor is a vector, or the first a transposed vector, then it is efficient
+to deal with these first. In particular `x' * B * y` means `(x' * B) * y`
+for an ordinary column-major `B::Matrix`. Unlike `dot(x, B, y)`, this
+allocates an intermediate array.
+
+If the first or last factor is a number, this will be fused with the matrix
+multiplication, using 5-arg [`mul!`](@ref).
+
+See also [`muladd`](@ref), [`dot`](@ref).
+
+!!! compat "Julia 1.7"
+    These optimisations require at least Julia 1.7.
+"""
+*(A::AbstractMatrix, B::AbstractMatrix, x::AbstractVector) = A * (B*x)
+
+*(tu::AdjOrTransAbsVec, B::AbstractMatrix, v::AbstractVector) = (tu*B) * v
+*(tu::AdjOrTransAbsVec, B::AdjOrTransAbsMat, v::AbstractVector) = tu * (B*v)
+
+*(A::AbstractMatrix, x::AbstractVector, γ::Number) = mat_vec_scalar(A,x,γ)
+*(A::AbstractMatrix, B::AbstractMatrix, γ::Number) = mat_mat_scalar(A,B,γ)
+*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractVector{<:RealOrComplex}) =
+    mat_vec_scalar(B,C,α)
+*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}) =
+    mat_mat_scalar(B,C,α)
+
+*(α::Number, u::AbstractVector, tv::AdjOrTransAbsVec) = broadcast(*, α, u, tv)
+*(u::AbstractVector, tv::AdjOrTransAbsVec, γ::Number) = broadcast(*, u, tv, γ)
+*(u::AbstractVector, tv::AdjOrTransAbsVec, C::AbstractMatrix) = u * (tv*C)
+
+*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix) = _tri_matmul(A,B,C)
+*(tv::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix) = (tv*B) * C
+
+function _tri_matmul(A,B,C,δ=nothing)
+    n,m = size(A)
+    # m,k == size(B)
+    k,l = size(C)
+    costAB_C = n*m*k + n*k*l  # multiplications, allocations n*k + n*l
+    costA_BC = m*k*l + n*m*l  #                              m*l + n*l
+    if costA_BC < costAB_C
+        isnothing(δ) ? A * (B*C) : A * mat_mat_scalar(B,C,δ)
+    else
+        isnothing(δ) ? (A*B) * C : mat_mat_scalar(A*B, C, δ)
+    end
+end
+
+# Fast path for two arrays * one scalar is opt-in, via mat_vec_scalar and mat_mat_scalar.
+
+mat_vec_scalar(A, x, γ) = A * (x * γ)  # fallback
+mat_vec_scalar(A::StridedMaybeAdjOrTransMat, x::StridedVector, γ) = _mat_vec_scalar(A, x, γ)
+mat_vec_scalar(A::AdjOrTransAbsVec, x::StridedVector, γ) = (A * x) * γ
+
+function _mat_vec_scalar(A, x, γ)
+    T = promote_type(eltype(A), eltype(x), typeof(γ))
+    C = similar(A, T, axes(A,1))
+    mul!(C, A, x, γ, false)
+end
+
+mat_mat_scalar(A, B, γ) = (A*B) * γ # fallback
+mat_mat_scalar(A::StridedMaybeAdjOrTransMat, B::StridedMaybeAdjOrTransMat, γ) =
+    _mat_mat_scalar(A, B, γ)
+
+function _mat_mat_scalar(A, B, γ)
+    T = promote_type(eltype(A), eltype(B), typeof(γ))
+    C = similar(A, T, axes(A,1), axes(B,2))
+    mul!(C, A, B, γ, false)
+end
+
+mat_mat_scalar(A::AdjointAbsVec, B, γ) = (γ' * (A * B)')' # preserving order, adjoint reverses
+mat_mat_scalar(A::AdjointAbsVec{<:RealOrComplex}, B::StridedMaybeAdjOrTransMat{<:RealOrComplex}, γ::RealOrComplex) =
+    mat_vec_scalar(B', A', γ')'
+
+mat_mat_scalar(A::TransposeAbsVec, B, γ) = transpose(γ * transpose(A * B))
+mat_mat_scalar(A::TransposeAbsVec{<:RealOrComplex}, B::StridedMaybeAdjOrTransMat{<:RealOrComplex}, γ::RealOrComplex) =
+    transpose(mat_vec_scalar(transpose(B), transpose(A), γ))
+
+
+# Four-argument *, by type
+*(α::Number, β::Number, C::AbstractMatrix, x::AbstractVector) = (α*β) * C * x
+*(α::Number, β::Number, C::AbstractMatrix, D::AbstractMatrix) = (α*β) * C * D
+*(α::Number, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = α * B * (C*x)
+*(α::Number, vt::AdjOrTransAbsVec, C::AbstractMatrix, x::AbstractVector) = α * (vt*C*x)
+*(α::RealOrComplex, vt::AdjOrTransAbsVec{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}, D::AbstractMatrix{<:RealOrComplex}) =
+    (α*vt*C) * D # solves an ambiguity
+
+*(A::AbstractMatrix, x::AbstractVector, γ::Number, δ::Number) = A * x * (γ*δ)
+*(A::AbstractMatrix, B::AbstractMatrix, γ::Number, δ::Number) = A * B * (γ*δ)
+*(A::AbstractMatrix, B::AbstractMatrix, x::AbstractVector, δ::Number, ) = A * (B*x*δ)
+*(vt::AdjOrTransAbsVec, B::AbstractMatrix, x::AbstractVector, δ::Number) = (vt*B*x) * δ
+*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, δ::Number) = (vt*B) * C * δ
+
+*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = A * B * (C*x)
+*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, D::AbstractMatrix) = (vt*B) * C * D
+*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = vt * B * (C*x)
+
+# Four-argument *, by size
+*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, δ::Number) = _tri_matmul(A,B,C,δ)
+*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}, D::AbstractMatrix{<:RealOrComplex}) =
+    _tri_matmul(B,C,D,α)
+*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, D::AbstractMatrix) =
+    _quad_matmul(A,B,C,D)
+
+function _quad_matmul(A,B,C,D)
+    c1 = _mul_cost((A,B),(C,D))
+    c2 = _mul_cost(((A,B),C),D)
+    c3 = _mul_cost(A,(B,(C,D)))
+    c4 = _mul_cost((A,(B,C)),D)
+    c5 = _mul_cost(A,((B,C),D))
+    cmin = min(c1,c2,c3,c4,c5)
+    if c1 == cmin
+        (A*B) * (C*D)
+    elseif c2 == cmin
+        ((A*B) * C) * D
+    elseif c3 == cmin
+        A * (B * (C*D))
+    elseif c4 == cmin
+        (A * (B*C)) * D
+    else
+        A * ((B*C) * D)
+    end
+end
+@inline _mul_cost(A::AbstractMatrix) = 0
+@inline _mul_cost((A,B)::Tuple) = _mul_cost(A,B)
+@inline _mul_cost(A,B) = _mul_cost(A) + _mul_cost(B) + *(_mul_sizes(A)..., last(_mul_sizes(B)))
+@inline _mul_sizes(A::AbstractMatrix) = size(A)
+@inline _mul_sizes((A,B)::Tuple) = first(_mul_sizes(A)), last(_mul_sizes(B))
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
index c9ba49d2cd1ada..15bc61e1b1774e 100644
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ b/stdlib/LinearAlgebra/src/qr.jl
@@ -127,6 +127,40 @@ Base.iterate(S::QRCompactWY) = (S.Q, Val(:R))
 Base.iterate(S::QRCompactWY, ::Val{:R}) = (S.R, Val(:done))
 Base.iterate(S::QRCompactWY, ::Val{:done}) = nothing
 
+# returns upper triangular views of all non-undef values of `qr(A).T`:
+#
+# julia> sparse(qr(A).T .== qr(A).T)
+# 36×100 SparseMatrixCSC{Bool, Int64} with 1767 stored entries:
+# ⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿
+# ⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿
+# ⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿
+# ⠀⠀⠀⠀⠀⠂⠛⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿
+# ⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⢀⠐⠙⢿⣿⣿⣿⣿
+# ⠀⠀⠐⠀⠀⠀⠀⠀⠀⢀⢙⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠁⠀⡀⠀⠙⢿⣿⣿
+# ⠀⠀⠐⠀⠀⠀⠀⠀⠀⠀⠄⠀⠙⢿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⡀⠀⠀⢀⠀⠀⠙⢿
+# ⠀⡀⠀⠀⠀⠀⠀⠀⠂⠒⠒⠀⠀⠀⠙⢿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⠀⠀⠀⠀⠀⠀⠀⢀⠀⠀⠀⡀⠀⠀
+# ⠀⠀⠀⠀⠀⠀⠀⠀⣈⡀⠀⠀⠀⠀⠀⠀⠙⢿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠂⠀⢀⠀
+#
+function _triuppers_qr(T)
+    blocksize, cols = size(T)
+    return Iterators.map(0:div(cols - 1, blocksize)) do i
+        n = min(blocksize, cols - i * blocksize)
+        return UpperTriangular(view(T, 1:n, (1:n) .+ i * blocksize))
+    end
+end
+
+function Base.hash(F::QRCompactWY, h::UInt)
+    return hash(F.factors, foldr(hash, _triuppers_qr(F.T); init=hash(QRCompactWY, h)))
+end
+function Base.:(==)(A::QRCompactWY, B::QRCompactWY)
+    return A.factors == B.factors && all(splat(==), zip(_triuppers_qr.((A.T, B.T))...))
+end
+function Base.isequal(A::QRCompactWY, B::QRCompactWY)
+    return isequal(A.factors, B.factors) && all(zip(_triuppers_qr.((A.T, B.T))...)) do (a, b)
+        isequal(a, b)::Bool
+    end
+end
+
 """
     QRPivoted <: Factorization
 
@@ -198,7 +232,7 @@ function qrfactUnblocked!(A::AbstractMatrix{T}) where {T}
 end
 
 # Find index for columns with largest two norm
-function indmaxcolumn(A::StridedMatrix)
+function indmaxcolumn(A::AbstractMatrix)
     mm = norm(view(A, :, 1))
     ii = 1
     for i = 2:size(A, 2)
@@ -211,7 +245,7 @@ function indmaxcolumn(A::StridedMatrix)
     return ii
 end
 
-function qrfactPivotedUnblocked!(A::StridedMatrix)
+function qrfactPivotedUnblocked!(A::AbstractMatrix)
     m, n = size(A)
     piv = Vector(UnitRange{BlasInt}(1,n))
     τ = Vector{eltype(A)}(undef, min(m,n))
@@ -246,17 +280,17 @@ function qrfactPivotedUnblocked!(A::StridedMatrix)
 end
 
 # LAPACK version
-qr!(A::StridedMatrix{<:BlasFloat}, ::Val{false} = Val(false); blocksize=36) =
+qr!(A::StridedMatrix{<:BlasFloat}, ::NoPivot; blocksize=36) =
     QRCompactWY(LAPACK.geqrt!(A, min(min(size(A)...), blocksize))...)
-qr!(A::StridedMatrix{<:BlasFloat}, ::Val{true}) = QRPivoted(LAPACK.geqp3!(A)...)
+qr!(A::StridedMatrix{<:BlasFloat}, ::ColumnNorm) = QRPivoted(LAPACK.geqp3!(A)...)
 
 # Generic fallbacks
 
 """
-    qr!(A, pivot=Val(false); blocksize)
+    qr!(A, pivot = NoPivot(); blocksize)
 
-`qr!` is the same as [`qr`](@ref) when `A` is a subtype of
-[`StridedMatrix`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
+`qr!` is the same as [`qr`](@ref) when `A` is a subtype of [`StridedMatrix`](@ref),
+but saves space by overwriting the input `A`, instead of creating a copy.
 An [`InexactError`](@ref) exception is thrown if the factorization produces a number not
 representable by the element type of `A`, e.g. for integer types.
 
@@ -287,19 +321,22 @@ julia> a = [1 2; 3 4]
  3  4
 
 julia> qr!(a)
-ERROR: InexactError: Int64(-3.1622776601683795)
+ERROR: InexactError: Int64(3.1622776601683795)
 Stacktrace:
 [...]
 ```
 """
-qr!(A::StridedMatrix, ::Val{false}) = qrfactUnblocked!(A)
-qr!(A::StridedMatrix, ::Val{true}) = qrfactPivotedUnblocked!(A)
-qr!(A::StridedMatrix) = qr!(A, Val(false))
+qr!(A::AbstractMatrix, ::NoPivot) = qrfactUnblocked!(A)
+qr!(A::AbstractMatrix, ::ColumnNorm) = qrfactPivotedUnblocked!(A)
+qr!(A::AbstractMatrix) = qr!(A, NoPivot())
+# TODO: Remove in Julia v2.0
+@deprecate qr!(A::AbstractMatrix, ::Val{true})  qr!(A, ColumnNorm())
+@deprecate qr!(A::AbstractMatrix, ::Val{false}) qr!(A, NoPivot())
 
 _qreltype(::Type{T}) where T = typeof(zero(T)/sqrt(abs2(one(T))))
 
 """
-    qr(A, pivot=Val(false); blocksize) -> F
+    qr(A, pivot = NoPivot(); blocksize) -> F
 
 Compute the QR factorization of the matrix `A`: an orthogonal (or unitary if `A` is
 complex-valued) matrix `Q`, and an upper triangular matrix `R` such that
@@ -310,7 +347,7 @@ A = Q R
 
 The returned object `F` stores the factorization in a packed format:
 
- - if `pivot == Val(true)` then `F` is a [`QRPivoted`](@ref) object,
+ - if `pivot == ColumnNorm()` then `F` is a [`QRPivoted`](@ref) object,
 
  - otherwise if the element type of `A` is a BLAS type ([`Float32`](@ref), [`Float64`](@ref),
    `ComplexF32` or `ComplexF64`), then `F` is a [`QRCompactWY`](@ref) object,
@@ -340,7 +377,7 @@ and `F.Q*A` are supported. A `Q` matrix can be converted into a regular matrix w
 orthogonal matrix.
 
 The block size for QR decomposition can be specified by keyword argument
-`blocksize :: Integer` when `pivot == Val(false)` and `A isa StridedMatrix{<:BlasFloat}`.
+`blocksize :: Integer` when `pivot == NoPivot()` and `A isa StridedMatrix{<:BlasFloat}`.
 It is ignored when `blocksize > minimum(size(A))`.  See [`QRCompactWY`](@ref).
 
 !!! compat "Julia 1.4"
@@ -378,10 +415,13 @@ true
 """
 function qr(A::AbstractMatrix{T}, arg...; kwargs...) where T
     require_one_based_indexing(A)
-    AA = similar(A, _qreltype(T), size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, _qreltype(T))
     return qr!(AA, arg...; kwargs...)
 end
+# TODO: remove in Julia v2.0
+@deprecate qr(A::AbstractMatrix, ::Val{false}; kwargs...) qr(A, NoPivot(); kwargs...)
+@deprecate qr(A::AbstractMatrix, ::Val{true}; kwargs...)  qr(A, ColumnNorm(); kwargs...)
+
 qr(x::Number) = qr(fill(x,1,1))
 function qr(v::AbstractVector)
     require_one_based_indexing(v)
@@ -465,6 +505,8 @@ end
 Base.propertynames(F::QRPivoted, private::Bool=false) =
     (:R, :Q, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
 
+adjoint(F::Union{QR,QRPivoted,QRCompactWY}) = Adjoint(F)
+
 abstract type AbstractQ{T} <: AbstractMatrix{T} end
 
 inv(Q::AbstractQ) = Q'
@@ -561,9 +603,9 @@ end
 ## Multiplication by Q
 ### QB
 lmul!(A::QRCompactWYQ{T,S}, B::StridedVecOrMat{T}) where {T<:BlasFloat, S<:StridedMatrix} =
-    LAPACK.gemqrt!('L','N',A.factors,A.T,B)
+    LAPACK.gemqrt!('L', 'N', A.factors, A.T, B)
 lmul!(A::QRPackedQ{T,S}, B::StridedVecOrMat{T}) where {T<:BlasFloat, S<:StridedMatrix} =
-    LAPACK.ormqr!('L','N',A.factors,A.τ,B)
+    LAPACK.ormqr!('L', 'N', A.factors, A.τ, B)
 function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     mA, nA = size(A.factors)
@@ -624,13 +666,13 @@ end
 
 ### QcB
 lmul!(adjA::Adjoint{<:Any,<:QRCompactWYQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasReal,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.gemqrt!('L','T',A.factors,A.T,B))
+    (A = adjA.parent; LAPACK.gemqrt!('L', 'T', A.factors, A.T, B))
 lmul!(adjA::Adjoint{<:Any,<:QRCompactWYQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasComplex,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.gemqrt!('L','C',A.factors,A.T,B))
+    (A = adjA.parent; LAPACK.gemqrt!('L', 'C', A.factors, A.T, B))
 lmul!(adjA::Adjoint{<:Any,<:QRPackedQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasReal,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.ormqr!('L','T',A.factors,A.τ,B))
+    (A = adjA.parent; LAPACK.ormqr!('L', 'T', A.factors, A.τ, B))
 lmul!(adjA::Adjoint{<:Any,<:QRPackedQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasComplex,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.ormqr!('L','C',A.factors,A.τ,B))
+    (A = adjA.parent; LAPACK.ormqr!('L', 'C', A.factors, A.τ, B))
 function lmul!(adjA::Adjoint{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     A = adjA.parent
@@ -681,7 +723,7 @@ end
 
 ### AQ
 rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
-    LAPACK.gemqrt!('R','N', B.factors, B.T, A)
+    LAPACK.gemqrt!('R', 'N', B.factors, B.T, A)
 rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
     LAPACK.ormqr!('R', 'N', B.factors, B.τ, A)
 function rmul!(A::StridedMatrix,Q::QRPackedQ)
@@ -724,13 +766,13 @@ end
 
 ### AQc
 rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.gemqrt!('R','T',B.factors,B.T,A))
+    (B = adjB.parent; LAPACK.gemqrt!('R', 'T', B.factors, B.T, A))
 rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.gemqrt!('R','C',B.factors,B.T,A))
+    (B = adjB.parent; LAPACK.gemqrt!('R', 'C', B.factors, B.T, A))
 rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRPackedQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.ormqr!('R','T',B.factors,B.τ,A))
+    (B = adjB.parent; LAPACK.ormqr!('R', 'T', B.factors, B.τ, A))
 rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.ormqr!('R','C',B.factors,B.τ,A))
+    (B = adjB.parent; LAPACK.ormqr!('R', 'C', B.factors, B.τ, A))
 function rmul!(A::StridedMatrix, adjQ::Adjoint{<:Any,<:QRPackedQ})
     Q = adjQ.parent
     mQ, nQ = size(Q.factors)
@@ -761,8 +803,7 @@ function *(A::StridedMatrix, adjB::Adjoint{<:Any,<:AbstractQ})
     TAB = promote_type(eltype(A),eltype(B))
     BB = convert(AbstractMatrix{TAB}, B)
     if size(A,2) == size(B.factors, 1)
-        AA = similar(A, TAB, size(A))
-        copyto!(AA, A)
+        AA = copy_similar(A, TAB)
         return rmul!(AA, adjoint(BB))
     elseif size(A,2) == size(B.factors,2)
         return rmul!([A zeros(TAB, size(A, 1), size(B.factors, 1) - size(B.factors, 2))], adjoint(BB))
@@ -910,7 +951,6 @@ function ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
     m, n = size(A)
     m < n && return _wide_qr_ldiv!(A, B)
 
-    mB, nB = size(B)
     lmul!(adjoint(A.Q), view(B, 1:m, :))
     R = A.factors
     ldiv!(UpperTriangular(view(R,1:n,:)), view(B, 1:n, :))
@@ -932,28 +972,35 @@ function ldiv!(A::QRPivoted, B::StridedMatrix)
     B
 end
 
-# convenience methods
-## return only the solution of a least squares problem while avoiding promoting
-## vectors to matrices.
-_cut_B(x::AbstractVector, r::UnitRange) = length(x)  > length(r) ? x[r]   : x
-_cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
-
-## append right hand side with zeros if necessary
-_zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
-_zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
+function _apply_permutation!(F::QRPivoted, B::AbstractVecOrMat)
+    # Apply permutation but only to the top part of the solution vector since
+    # it's padded with zeros for underdetermined problems
+    B[1:length(F.p), :] = B[F.p, :]
+    return B
+end
+_apply_permutation!(F::Factorization, B::AbstractVecOrMat) = B
 
-function (\)(A::Union{QR{TA},QRCompactWY{TA},QRPivoted{TA}}, B::AbstractVecOrMat{TB}) where {TA,TB}
+function ldiv!(Fadj::Adjoint{<:Any,<:Union{QR,QRCompactWY,QRPivoted}}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
-    S = promote_type(TA,TB)
-    m, n = size(A)
-    m == size(B,1) || throw(DimensionMismatch("Both inputs should have the same number of rows"))
+    m, n = size(Fadj)
+
+    # We don't allow solutions overdetermined systems
+    if m > n
+        throw(DimensionMismatch("overdetermined systems are not supported"))
+    end
+    if n != size(B, 1)
+        throw(DimensionMismatch("inputs should have the same number of rows"))
+    end
+    F = parent(Fadj)
 
-    AA = Factorization{S}(A)
+    B = _apply_permutation!(F, B)
 
-    X = _zeros(S, B, n)
-    X[1:size(B, 1), :] = B
-    ldiv!(AA, X)
-    return _cut_B(X, 1:n)
+    # For underdetermined system, the triangular solve should only be applied to the top
+    # part of B that contains the rhs. For square problems, the view corresponds to B itself
+    ldiv!(LowerTriangular(adjoint(F.R)), view(B, 1:size(F.R, 2), :))
+    lmul!(F.Q, B)
+
+    return B
 end
 
 # With a real lhs and complex rhs with the same precision, we can reinterpret the complex
diff --git a/stdlib/LinearAlgebra/src/schur.jl b/stdlib/LinearAlgebra/src/schur.jl
index 403339d8b1debc..610067fe514529 100644
--- a/stdlib/LinearAlgebra/src/schur.jl
+++ b/stdlib/LinearAlgebra/src/schur.jl
@@ -104,6 +104,13 @@ be obtained from the `Schur` object `F` with either `F.Schur` or `F.T` and the
 orthogonal/unitary Schur vectors can be obtained with `F.vectors` or `F.Z` such that
 `A = F.vectors * F.Schur * F.vectors'`. The eigenvalues of `A` can be obtained with `F.values`.
 
+For real `A`, the Schur factorization is "quasitriangular", which means that it
+is upper-triangular except with 2×2 diagonal blocks for any conjugate pair
+of complex eigenvalues; this allows the factorization to be purely real even
+when there are complex eigenvalues.  To obtain the (complex) purely upper-triangular
+Schur factorization from a real quasitriangular factorization, you can use
+`Schur{Complex}(schur(A))`.
+
 Iterating the decomposition produces the components `F.T`, `F.Z`, and `F.values`.
 
 # Examples
@@ -142,7 +149,7 @@ true
 schur(A::StridedMatrix{<:BlasFloat}) = schur!(copy(A))
 schur(A::StridedMatrix{T}) where T = schur!(copy_oftype(A, eigtype(T)))
 
-schur(A::AbstractMatrix{T}) where {T} = schur!(copyto!(Matrix{eigtype(T)}(undef, size(A)...), A))
+schur(A::AbstractMatrix{T}) where {T} = schur!(copy_to_array(A, eigtype(T)))
 function schur(A::RealHermSymComplexHerm)
     F = eigen(A; sortby=nothing)
     return Schur(typeof(F.vectors)(Diagonal(F.values)), F.vectors, F.values)
@@ -207,6 +214,48 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Schur)
     show(io, mime, F.values)
 end
 
+# convert a (standard-form) quasi-triangular real Schur factorization into a
+# triangular complex Schur factorization.
+#
+# Based on the "triangularize" function from GenericSchur.jl,
+# released under the MIT "Expat" license by @RalphAS
+function Schur{CT}(S::Schur{<:Real}) where {CT<:Complex}
+    Tr = S.T
+    T = CT.(Tr)
+    Z = CT.(S.Z)
+    n = size(T,1)
+    for j=n:-1:2
+        if !iszero(Tr[j,j-1])
+            # We want a unitary similarity transform from
+            # ┌   ┐      ┌     ┐
+            # │a b│      │w₁  x│
+            # │c a│ into │0  w₂│ where bc < 0 (a,b,c real)
+            # └   ┘      └     ┘
+            # If we write it as
+            # ┌     ┐
+            # │u  v'│
+            # │-v u'│
+            # └     ┘
+            # and make the Ansatz that u is real (so v is imaginary),
+            # we arrive at a Givens rotation:
+            # θ = atan(sqrt(-Tr[j,j-1]/Tr[j-1,j]))
+            # s,c = sin(θ), cos(θ)
+            s = sqrt(abs(Tr[j,j-1]))
+            c = sqrt(abs(Tr[j-1,j]))
+            r = hypot(s,c)
+            G = Givens(j-1,j,complex(c/r),im*(-s/r))
+            lmul!(G,T)
+            rmul!(T,G')
+            rmul!(Z,G')
+        end
+    end
+    return Schur(triu!(T),Z,diag(T))
+end
+
+Schur{Complex}(S::Schur{<:Complex}) = S
+Schur{T}(S::Schur{T}) where {T} = S
+Schur{T}(S::Schur) where {T} = Schur(T.(S.T), T.(S.Z), T <: Real && !(eltype(S.values) <: Real) ? complex(T).(S.values) : T.(S.values))
+
 """
     ordschur!(F::Schur, select::Union{Vector{Bool},BitVector}) -> F::Schur
 
@@ -306,6 +355,10 @@ function schur(A::StridedMatrix{TA}, B::StridedMatrix{TB}) where {TA,TB}
     S = promote_type(eigtype(TA), TB)
     return schur!(copy_oftype(A, S), copy_oftype(B, S))
 end
+function schur(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
+    S = promote_type(eigtype(TA), TB)
+    return schur!(copy_oftype(A, S), copy_oftype(B, S))
+end
 
 """
     ordschur!(F::GeneralizedSchur, select::Union{Vector{Bool},BitVector}) -> F::GeneralizedSchur
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index 636c19159de739..5c25c0993e9cc6 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -281,9 +281,15 @@ function (-)(A::UniformScaling, B::Diagonal{<:Number})
 end
 
 rmul!(A::AbstractTriangular, adjB::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
-    (B = adjB.parent; rmul!(full!(A), adjoint(B)))
+    rmul!(full!(A), adjB)
 *(A::AbstractTriangular, adjB::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
-    (B = adjB.parent; *(copyto!(similar(parent(A)), A), adjoint(B)))
+    *(copyto!(similar(parent(A)), A), adjB)
+*(A::BiTriSym, adjB::Adjoint{<:Any,<:Union{QRCompactWYQ, QRPackedQ}}) =
+    rmul!(copyto!(Array{promote_type(eltype(A), eltype(adjB))}(undef, size(A)...), A), adjB)
+*(adjA::Adjoint{<:Any,<:Union{QRCompactWYQ, QRPackedQ}}, B::Diagonal) =
+    lmul!(adjA, copyto!(Array{promote_type(eltype(adjA), eltype(B))}(undef, size(B)...), B))
+*(adjA::Adjoint{<:Any,<:Union{QRCompactWYQ, QRPackedQ}}, B::BiTriSym) =
+    lmul!(adjA, copyto!(Array{promote_type(eltype(adjA), eltype(B))}(undef, size(B)...), B))
 
 # fill[stored]! methods
 fillstored!(A::Diagonal, x) = (fill!(A.diag, x); A)
diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
index f9416b4cf3557f..95a18427022913 100644
--- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
@@ -105,9 +105,29 @@ function isstructurepreserving(::typeof(Base.literal_pow), ::Ref{typeof(^)}, ::S
 end
 isstructurepreserving(f, args...) = false
 
-_iszero(n::Number) = iszero(n)
-_iszero(x) = x == 0
-fzeropreserving(bc) = (v = fzero(bc); !ismissing(v) && _iszero(v))
+"""
+    iszerodefined(T::Type)
+
+Return a `Bool` indicating whether `iszero` is well-defined for objects of type
+`T`. By default, this function returns `false` unless `T <: Number`. Note that
+this function may return `true` even if `zero(::T)` is not defined as long as
+`iszero(::T)` has a method that does not requires `zero(::T)`.
+
+This function is used to determine if mapping the elements of an array with
+a specific structure of nonzero elements preserve this structure.
+For instance, it is used to determine whether the output of
+`tuple.(Diagonal([1, 2]))` is `Diagonal([(1,), (2,)])` or
+`[(1,) (0,); (0,) (2,)]`. For this, we need to determine whether `(0,)` is
+considered to be zero. `iszero((0,))` falls back to `(0,) == zero((0,))` which
+fails as `zero(::Tuple{Int})` is not defined. However,
+`iszerodefined(::Tuple{Int})` is `false` hence we falls back to the comparison
+`(0,) == 0` which returns `false` and decides that the correct output is
+`[(1,) (0,); (0,) (2,)]`.
+"""
+iszerodefined(::Type) = false
+iszerodefined(::Type{<:Number}) = true
+
+fzeropreserving(bc) = (v = fzero(bc); !ismissing(v) && (iszerodefined(typeof(v)) ? iszero(v) : v == 0))
 # Like sparse matrices, we assume that the zero-preservation property of a broadcasted
 # expression is stable.  We can test the zero-preservability by applying the function
 # in cases where all other arguments are known scalars against a zero from the structured
diff --git a/stdlib/LinearAlgebra/src/svd.jl b/stdlib/LinearAlgebra/src/svd.jl
index 68bce4793661ff..bee4c8451c0a60 100644
--- a/stdlib/LinearAlgebra/src/svd.jl
+++ b/stdlib/LinearAlgebra/src/svd.jl
@@ -72,6 +72,11 @@ function SVD{T}(U::AbstractArray, S::AbstractVector{Tr}, Vt::AbstractArray) wher
         convert(AbstractArray{T}, Vt))
 end
 
+SVD{T}(F::SVD) where {T} = SVD(
+    convert(AbstractMatrix{T}, F.U),
+    convert(AbstractVector{real(T)}, F.S),
+    convert(AbstractMatrix{T}, F.Vt))
+Factorization{T}(F::SVD) where {T} = SVD{T}(F)
 
 # iteration for destructuring into components
 Base.iterate(S::SVD) = (S.U, Val(:S))
@@ -170,6 +175,10 @@ true
 function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T}
     svd!(copy_oftype(A, eigtype(T)), full = full, alg = alg)
 end
+function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T <: Union{Float16,Complex{Float16}}}
+    A = svd!(copy_oftype(A, eigtype(T)), full = full, alg = alg)
+    return SVD{T}(A)
+end
 function svd(x::Number; full::Bool = false, alg::Algorithm = default_svd_alg(x))
     SVD(x == 0 ? fill(one(x), 1, 1) : fill(x/abs(x), 1, 1), [abs(x)], fill(one(x), 1, 1))
 end
@@ -235,10 +244,12 @@ svdvals(A::AbstractVector{<:BlasFloat}) = [norm(A)]
 svdvals(x::Number) = abs(x)
 svdvals(S::SVD{<:Any,T}) where {T} = (S.S)::Vector{T}
 
-# SVD least squares
+### SVD least squares ###
 function ldiv!(A::SVD{T}, B::StridedVecOrMat) where T
+    m, n = size(A)
     k = searchsortedlast(A.S, eps(real(T))*A.S[1], rev=true)
-    view(A.Vt,1:k,:)' * (view(A.S,1:k) .\ (view(A.U,:,1:k)' * B))
+    mul!(view(B, 1:n, :), view(A.Vt, 1:k, :)', view(A.S, 1:k) .\ (view(A.U, :, 1:k)' * _cut_B(B, 1:m)))
+    return B
 end
 
 function inv(F::SVD{T}) where T
@@ -252,6 +263,10 @@ end
 size(A::SVD, dim::Integer) = dim == 1 ? size(A.U, dim) : size(A.Vt, dim)
 size(A::SVD) = (size(A, 1), size(A, 2))
 
+function adjoint(F::SVD)
+    return SVD(F.Vt', F.S, F.U')
+end
+
 function show(io::IO, mime::MIME{Symbol("text/plain")}, F::SVD{<:Any,<:Any,<:AbstractArray})
     summary(io, F); println(io)
     println(io, "U factor:")
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
index ab031b4d30c43b..d8e7b6609c0556 100644
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ b/stdlib/LinearAlgebra/src/symmetric.jl
@@ -356,8 +356,6 @@ real(A::Symmetric) = Symmetric(real(A.data), sym_uplo(A.uplo))
 real(A::Hermitian) = Hermitian(real(A.data), sym_uplo(A.uplo))
 imag(A::Symmetric) = Symmetric(imag(A.data), sym_uplo(A.uplo))
 
-Base.copy(A::Adjoint{<:Any,<:Hermitian}) = copy(A.parent)
-Base.copy(A::Transpose{<:Any,<:Symmetric}) = copy(A.parent)
 Base.copy(A::Adjoint{<:Any,<:Symmetric}) =
     Symmetric(copy(adjoint(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
 Base.copy(A::Transpose{<:Any,<:Hermitian}) =
@@ -600,36 +598,6 @@ function dot(x::AbstractVector, A::RealHermSymComplexHerm, y::AbstractVector)
     return r
 end
 
-# Fallbacks to avoid generic_matvecmul!/generic_matmatmul!
-## Symmetric{<:Number} and Hermitian{<:Real} are invariant to transpose; peel off the t
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, B::AbstractVector) = transA.parent * B
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, B::AbstractMatrix) = transA.parent * B
-*(A::AbstractMatrix, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = A * transB.parent
-## Hermitian{<:Number} and Symmetric{<:Real} are invariant to adjoint; peel off the c
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::AbstractVector) = adjA.parent * B
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::AbstractMatrix) = adjA.parent * B
-*(A::AbstractMatrix, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * adjB.parent
-
-# ambiguities with transposed AbstractMatrix methods in linalg/matmul.jl
-*(transA::Transpose{<:Any,<:RealHermSym}, transB::Transpose{<:Any,<:RealHermSym}) = transA * transB.parent
-*(transA::Transpose{<:Any,<:RealHermSym}, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = transA * transB.parent
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = transA.parent * transB.parent
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, transB::Transpose{<:Any,<:RealHermSym}) = transA.parent * transB
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, transB::Transpose{<:Any,<:RealHermSymComplexHerm}) = transA.parent * transB
-*(transA::Transpose{<:Any,<:RealHermSymComplexHerm}, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = transA * transB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSym}, adjB::Adjoint{<:Any,<:RealHermSym}) = adjA * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = adjA.parent * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSym}, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = adjA * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexSym}, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = adjA * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, adjB::Adjoint{<:Any,<:RealHermSym}) = adjA.parent * adjB
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, adjB::Adjoint{<:Any,<:RealHermSymComplexSym}) = adjA.parent * adjB
-
-# ambiguities with AbstractTriangular
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, B::AbstractTriangular) = transA.parent * B
-*(A::AbstractTriangular, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = A * transB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::AbstractTriangular) = adjA.parent * B
-*(A::AbstractTriangular, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * adjB.parent
-
 # Scaling with Number
 *(A::Symmetric, x::Number) = Symmetric(A.data*x, sym_uplo(A.uplo))
 *(x::Number, A::Symmetric) = Symmetric(x*A.data, sym_uplo(A.uplo))
@@ -676,7 +644,7 @@ end
 inv(A::Hermitian{<:Any,<:StridedMatrix}) = Hermitian(_inv(A), sym_uplo(A.uplo))
 inv(A::Symmetric{<:Any,<:StridedMatrix}) = Symmetric(_inv(A), sym_uplo(A.uplo))
 
-function svd(A::RealHermSymComplexHerm, full::Bool=false)
+function svd(A::RealHermSymComplexHerm; full::Bool=false)
     vals, vecs = eigen(A)
     I = sortperm(vals; by=abs, rev=true)
     permute!(vals, I)
@@ -770,6 +738,12 @@ for func in (:exp, :cos, :sin, :tan, :cosh, :sinh, :tanh, :atan, :asinh, :atanh)
     end
 end
 
+function cis(A::Union{RealHermSymComplexHerm,SymTridiagonal{<:Real}})
+    F = eigen(A)
+    # The returned matrix is unitary, and is complex-symmetric for real A
+    return F.vectors .* cis.(F.values') * F.vectors'
+end
+
 for func in (:acos, :asin)
     @eval begin
         function ($func)(A::HermOrSym{<:Real})
@@ -878,35 +852,3 @@ for func in (:log, :sqrt)
         end
     end
 end
-
-# disambiguation methods: *(Adj of RealHermSymComplexHerm, Trans of RealHermSymComplexSym) and symmetric partner
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A.parent * B.parent
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A.parent * B.parent
-# disambiguation methods: *(Adj/Trans of AbsVec/AbsMat, Adj/Trans of RealHermSymComplex{Herm|Sym})
-*(A::Adjoint{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Adjoint{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Adjoint{<:Any,<:AbstractVector}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Adjoint{<:Any,<:AbstractMatrix}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractVector}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractMatrix}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-# disambiguation methods: *(Adj/Trans of RealHermSymComplex{Herm|Sym}, Adj/Trans of AbsVec/AbsMat)
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Adjoint{<:Any,<:AbstractVector}) = A.parent * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Adjoint{<:Any,<:AbstractMatrix}) = A.parent * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Transpose{<:Any,<:AbstractVector}) = A.parent * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Transpose{<:Any,<:AbstractMatrix}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:AbstractVector}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:AbstractMatrix}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Transpose{<:Any,<:AbstractVector}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Transpose{<:Any,<:AbstractMatrix}) = A.parent * B
-
-# disambiguation methods: *(Adj/Trans of AbsTri or RealHermSymComplex{Herm|Sym}, Adj/Trans of other)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Adjoint{<:Any,<:AbstractTriangular}) = A.parent * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Transpose{<:Any,<:AbstractTriangular}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:AbstractTriangular}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Transpose{<:Any,<:AbstractTriangular}) = A.parent * B
diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl
index cfc4e948d8d3d3..bd0566a11b3f2b 100644
--- a/stdlib/LinearAlgebra/src/triangular.jl
+++ b/stdlib/LinearAlgebra/src/triangular.jl
@@ -33,6 +33,8 @@ for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular,
         end
         Matrix(A::$t{T}) where {T} = Matrix{T}(A)
 
+        AbstractMatrix{T}(A::$t) where {T} = $t{T}(A)
+
         size(A::$t, d) = size(A.data, d)
         size(A::$t) = size(A.data)
 
@@ -285,23 +287,11 @@ end
 
 function istril(A::Union{LowerTriangular,UnitLowerTriangular}, k::Integer=0)
     k >= 0 && return true
-    m, n = size(A)
-    for j in max(1, k + 2):n
-        for i in 1:min(j - k - 1, m)
-            iszero(A[i, j]) || return false
-        end
-    end
-    return true
+    return _istril(A, k)
 end
 function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
     k <= 0 && return true
-    m, n = size(A)
-    for j in 1:min(n, m + k - 1)
-        for i in max(1, j - k + 1):m
-            iszero(A[i, j]) || return false
-        end
-    end
-    return true
+    return _istriu(A, k)
 end
 istril(A::Adjoint) = istriu(A.parent)
 istril(A::Transpose) = istriu(A.parent)
@@ -1192,148 +1182,157 @@ for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
 end
 
 #Generic solver using naive substitution
-# manually hoisting x[j] significantly improves performance as of Dec 2015
+# manually hoisting b[j] significantly improves performance as of Dec 2015
 # manually eliding bounds checking significantly improves performance as of Dec 2015
 # directly indexing A.data rather than A significantly improves performance as of Dec 2015
 # replacing repeated references to A.data with [Adata = A.data and references to Adata]
 # does not significantly impact performance as of Dec 2015
 # replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
 # does not significantly impact performance as of Dec 2015
-function naivesub!(A::UpperTriangular, b::AbstractVector, x::AbstractVector = b)
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::UpperTriangular, b::AbstractVector)
+    require_one_based_indexing(A, b)
     n = size(A, 2)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+    if !(n == length(b))
+        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
     end
     @inbounds for j in n:-1:1
         iszero(A.data[j,j]) && throw(SingularException(j))
-        xj = x[j] = A.data[j,j] \ b[j]
+        bj = b[j] = A.data[j,j] \ b[j]
         for i in j-1:-1:1 # counterintuitively 1:j-1 performs slightly better
-            b[i] -= A.data[i,j] * xj
+            b[i] -= A.data[i,j] * bj
         end
     end
-    x
+    return b
 end
-function naivesub!(A::UnitUpperTriangular, b::AbstractVector, x::AbstractVector = b)
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::UnitUpperTriangular, b::AbstractVector)
+    require_one_based_indexing(A, b)
     n = size(A, 2)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+    if !(n == length(b))
+        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
     end
     @inbounds for j in n:-1:1
-        xj = x[j] = b[j]
+        bj = b[j]
         for i in j-1:-1:1 # counterintuitively 1:j-1 performs slightly better
-            b[i] -= A.data[i,j] * xj
+            b[i] -= A.data[i,j] * bj
         end
     end
-    x
+    return b
 end
-function naivesub!(A::LowerTriangular, b::AbstractVector, x::AbstractVector = b)
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::LowerTriangular, b::AbstractVector)
+    require_one_based_indexing(A, b)
     n = size(A, 2)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+    if !(n == length(b))
+        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
     end
     @inbounds for j in 1:n
         iszero(A.data[j,j]) && throw(SingularException(j))
-        xj = x[j] = A.data[j,j] \ b[j]
+        bj = b[j] = A.data[j,j] \ b[j]
         for i in j+1:n
-            b[i] -= A.data[i,j] * xj
+            b[i] -= A.data[i,j] * bj
         end
     end
-    x
+    return b
 end
-function naivesub!(A::UnitLowerTriangular, b::AbstractVector, x::AbstractVector = b)
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::UnitLowerTriangular, b::AbstractVector)
+    require_one_based_indexing(A, b)
     n = size(A, 2)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+    if !(n == length(b))
+        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
     end
     @inbounds for j in 1:n
-        xj = x[j] = b[j]
+        bj = b[j]
         for i in j+1:n
-            b[i] -= A.data[i,j] * xj
+            b[i] -= A.data[i,j] * bj
         end
     end
-    x
+    return b
 end
+function ldiv!(A::AbstractTriangular, B::AbstractMatrix)
+    require_one_based_indexing(A, B)
+    nA, mA = size(A)
+    n = size(B, 1)
+    if nA != n
+        throw(DimensionMismatch("second dimension of left hand side A, $mA, and first dimension of right hand side B, $n, must be equal"))
+    end
+    for b in eachcol(B)
+        ldiv!(A, b)
+    end
+    B
+end
+
 # in the following transpose and conjugate transpose naive substitution variants,
-# accumulating in z rather than b[j] significantly improves performance as of Dec 2015
+# accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
 for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
     @eval begin
-        function ldiv!(xA::UpperTriangular{<:Any,<:$t}, b::AbstractVector, x::AbstractVector)
-            require_one_based_indexing(xA, b, x)
+        function ldiv!(xA::UpperTriangular{<:Any,<:$t}, b::AbstractVector)
+            require_one_based_indexing(xA, b)
             A = parent(parent(xA))
             n = size(A, 1)
-            if !(n == length(b) == length(x))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+            if !(n == length(b))
+                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
             end
             @inbounds for j in n:-1:1
                 z = b[j]
                 for i in n:-1:j+1
-                    z -= $tfun(A[i,j]) * x[i]
+                    z -= $tfun(A[i,j]) * b[i]
                 end
                 iszero(A[j,j]) && throw(SingularException(j))
-                x[j] = $tfun(A[j,j]) \ z
+                b[j] = $tfun(A[j,j]) \ z
             end
-            x
+            return b
         end
-        ldiv!(xA::UpperTriangular{<:Any,<:$t}, b::AbstractVector) = ldiv!(xA, b, b)
 
-        function ldiv!(xA::UnitUpperTriangular{<:Any,<:$t}, b::AbstractVector, x::AbstractVector)
-            require_one_based_indexing(xA, b, x)
+        function ldiv!(xA::UnitUpperTriangular{<:Any,<:$t}, b::AbstractVector)
+            require_one_based_indexing(xA, b)
             A = parent(parent(xA))
             n = size(A, 1)
-            if !(n == length(b) == length(x))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+            if !(n == length(b))
+                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
             end
             @inbounds for j in n:-1:1
                 z = b[j]
                 for i in n:-1:j+1
-                    z -= $tfun(A[i,j]) * x[i]
+                    z -= $tfun(A[i,j]) * b[i]
                 end
-                x[j] = z
+                b[j] = z
             end
-            x
+            return b
         end
-        ldiv!(xA::UnitUpperTriangular{<:Any,<:$t}, b::AbstractVector) = ldiv!(xA, b, b)
 
-        function ldiv!(xA::LowerTriangular{<:Any,<:$t}, b::AbstractVector, x::AbstractVector)
-            require_one_based_indexing(xA, b, x)
+        function ldiv!(xA::LowerTriangular{<:Any,<:$t}, b::AbstractVector)
+            require_one_based_indexing(xA, b)
             A = parent(parent(xA))
             n = size(A, 1)
-            if !(n == length(b) == length(x))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+            if !(n == length(b))
+                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
             end
             @inbounds for j in 1:n
                 z = b[j]
                 for i in 1:j-1
-                    z -= $tfun(A[i,j]) * x[i]
+                    z -= $tfun(A[i,j]) * b[i]
                 end
                 iszero(A[j,j]) && throw(SingularException(j))
-                x[j] = $tfun(A[j,j]) \ z
+                b[j] = $tfun(A[j,j]) \ z
             end
-            x
+            return b
         end
-        ldiv!(xA::LowerTriangular{<:Any,<:$t}, b::AbstractVector) = ldiv!(xA, b, b)
 
-        function ldiv!(xA::UnitLowerTriangular{<:Any,<:$t}, b::AbstractVector, x::AbstractVector)
-            require_one_based_indexing(xA, b, x)
+        function ldiv!(xA::UnitLowerTriangular{<:Any,<:$t}, b::AbstractVector)
+            require_one_based_indexing(xA, b)
             A = parent(parent(xA))
             n = size(A, 1)
-            if !(n == length(b) == length(x))
-                throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+            if !(n == length(b))
+                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
             end
             @inbounds for j in 1:n
                 z = b[j]
                 for i in 1:j-1
-                    z -= $tfun(A[i,j]) * x[i]
+                    z -= $tfun(A[i,j]) * b[i]
                 end
-                x[j] = z
+                b[j] = z
             end
-            x
+            return b
         end
-        ldiv!(xA::UnitLowerTriangular{<:Any,<:$t}, b::AbstractVector) = ldiv!(xA, b, b)
     end
 end
 
@@ -1518,64 +1517,56 @@ for (f, f2!) in ((:*, :lmul!), (:\, :ldiv!))
         function ($f)(A::LowerTriangular, B::LowerTriangular)
             TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
                          ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+            BB = copy_similar(B, TAB)
             return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
         function $(f)(A::UnitLowerTriangular, B::LowerTriangular)
             TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                          (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+             BB = copy_similar(B, TAB)
             return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
         function $(f)(A::LowerTriangular, B::UnitLowerTriangular)
             TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
                          ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+             BB = copy_similar(B, TAB)
             return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
         function $(f)(A::UnitLowerTriangular, B::UnitLowerTriangular)
             TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                          (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+             BB = copy_similar(B, TAB)
             return UnitLowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
         function ($f)(A::UpperTriangular, B::UpperTriangular)
             TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
                          ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+            BB = copy_similar(B, TAB)
             return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
         function ($f)(A::UnitUpperTriangular, B::UpperTriangular)
             TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                          (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+            BB = copy_similar(B, TAB)
             return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
         function ($f)(A::UpperTriangular, B::UnitUpperTriangular)
             TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
                          ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+            BB = copy_similar(B, TAB)
             return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
         function ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular)
             TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                          (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+            BB = copy_similar(B, TAB)
             return UnitUpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
     end
@@ -1584,57 +1575,49 @@ end
 function (/)(A::LowerTriangular, B::LowerTriangular)
     TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
                  (/)(zero(eltype(A)), one(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::UnitLowerTriangular, B::LowerTriangular)
     TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
                  (/)(zero(eltype(A)), one(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::LowerTriangular, B::UnitLowerTriangular)
     TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
                  (/)(zero(eltype(A)), one(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::UnitLowerTriangular, B::UnitLowerTriangular)
     TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                  (*)(zero(eltype(A)), zero(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     return UnitLowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::UpperTriangular, B::UpperTriangular)
     TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
                  (/)(zero(eltype(A)), one(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::UnitUpperTriangular, B::UpperTriangular)
     TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
                  (/)(zero(eltype(A)), one(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::UpperTriangular, B::UnitUpperTriangular)
     TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
                  (/)(zero(eltype(A)), one(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::UnitUpperTriangular, B::UnitUpperTriangular)
     TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                  (*)(zero(eltype(A)), zero(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     return UnitUpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 
@@ -1642,8 +1625,7 @@ _inner_type_promotion(A,B) = promote_type(eltype(A), eltype(B), typeof(zero(elty
 ## The general promotion methods
 function *(A::AbstractTriangular, B::AbstractTriangular)
     TAB = _inner_type_promotion(A,B)
-    BB = similar(B, TAB, size(B))
-    copyto!(BB, B)
+    BB = copy_similar(B, TAB)
     lmul!(convert(AbstractArray{TAB}, A), BB)
 end
 
@@ -1652,40 +1634,35 @@ for mat in (:AbstractVector, :AbstractMatrix)
     @eval function *(A::AbstractTriangular, B::$mat)
         require_one_based_indexing(B)
         TAB = _inner_type_promotion(A,B)
-        BB = similar(B, TAB, size(B))
-        copyto!(BB, B)
+        BB = copy_similar(B, TAB)
         lmul!(convert(AbstractArray{TAB}, A), BB)
     end
     ### Left division with triangle to the left hence rhs cannot be transposed. No quotients.
     @eval function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
         require_one_based_indexing(B)
         TAB = _inner_type_promotion(A,B)
-        BB = similar(B, TAB, size(B))
-        copyto!(BB, B)
+        BB = copy_similar(B, TAB)
         ldiv!(convert(AbstractArray{TAB}, A), BB)
     end
     ### Left division with triangle to the left hence rhs cannot be transposed. Quotients.
     @eval function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
         require_one_based_indexing(B)
         TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-        BB = similar(B, TAB, size(B))
-        copyto!(BB, B)
+        BB = copy_similar(B, TAB)
         ldiv!(convert(AbstractArray{TAB}, A), BB)
     end
     ### Right division with triangle to the right hence lhs cannot be transposed. No quotients.
     @eval function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
         require_one_based_indexing(A)
         TAB = _inner_type_promotion(A,B)
-        AA = similar(A, TAB, size(A))
-        copyto!(AA, A)
+        AA = copy_similar(A, TAB)
         rdiv!(AA, convert(AbstractArray{TAB}, B))
     end
     ### Right division with triangle to the right hence lhs cannot be transposed. Quotients.
     @eval function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
         require_one_based_indexing(A)
         TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-        AA = similar(A, TAB, size(A))
-        copyto!(AA, A)
+        AA = copy_similar(A, TAB)
         rdiv!(AA, convert(AbstractArray{TAB}, B))
     end
 end
@@ -1694,8 +1671,7 @@ end
 function *(A::AbstractMatrix, B::AbstractTriangular)
     require_one_based_indexing(A)
     TAB = _inner_type_promotion(A,B)
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     rmul!(AA, convert(AbstractArray{TAB}, B))
 end
 # ambiguity resolution with definitions in linalg/rowvector.jl
@@ -2156,12 +2132,17 @@ end
 # 35(4), (2013), C394–C410.
 # Eq. 6.1
 Base.@propagate_inbounds function _log_diag_block_2x2!(A, A0)
-    a, b, c, d = A0[1,1], A0[1,2], A0[2,1], A0[2,2]
-    bc = b * c
-    s = sqrt(-bc)
+    a, b, c = A0[1,1], A0[1,2], A0[2,1]
+    # avoid underflow/overflow for large/small b and c
+    s = sqrt(abs(b)) * sqrt(abs(c))
     θ = atan(s, a)
     t = θ / s
-    a1 = log(a^2 - bc) / 2
+    au = abs(a)
+    if au > s
+        a1 = log1p((s / au)^2) / 2 + log(au)
+    else
+        a1 = log1p((au / s)^2) / 2 + log(s)
+    end
     A[1,1] = a1
     A[2,1] = c*t
     A[1,2] = b*t
@@ -2342,7 +2323,7 @@ sqrt(A::UnitLowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
 # Auxiliary functions for matrix square root
 
 # square root of upper triangular or real upper quasitriangular matrix
-function sqrt_quasitriu(A0)
+function sqrt_quasitriu(A0; blockwidth = eltype(A0) <: Complex ? 512 : 256)
     n = checksquare(A0)
     T = eltype(A0)
     Tr = typeof(sqrt(real(zero(T))))
@@ -2369,7 +2350,7 @@ function sqrt_quasitriu(A0)
         A = A0
         R = zeros(Tc, n, n)
     end
-    _sqrt_quasitriu!(R, A)
+    _sqrt_quasitriu!(R, A; blockwidth=blockwidth, n=n)
     Rc = eltype(A0) <: Real ? R : complex(R)
     if A0 isa UpperTriangular
         return UpperTriangular(Rc)
@@ -2380,7 +2361,32 @@ function sqrt_quasitriu(A0)
     end
 end
 
-function _sqrt_quasitriu!(R, A)
+# in-place recursive sqrt of upper quasi-triangular matrix A from
+# Deadman E., Higham N.J., Ralha R. (2013) Blocked Schur Algorithms for Computing the Matrix
+# Square Root. Applied Parallel and Scientific Computing. PARA 2012. Lecture Notes in
+# Computer Science, vol 7782. https://doi.org/10.1007/978-3-642-36803-5_12
+function _sqrt_quasitriu!(R, A; blockwidth=64, n=checksquare(A))
+    if n ≤ blockwidth || !(eltype(R) <: BlasFloat) # base case, perform "point" algorithm
+        _sqrt_quasitriu_block!(R, A)
+    else  # compute blockwise recursion
+        split = div(n, 2)
+        iszero(A[split+1, split]) || (split += 1) # don't split 2x2 diagonal block
+        r1 = 1:split
+        r2 = (split + 1):n
+        n1, n2 = split, n - split
+        A11, A12, A22 = @views A[r1,r1], A[r1,r2], A[r2,r2]
+        R11, R12, R22 = @views R[r1,r1], R[r1,r2], R[r2,r2]
+        # solve diagonal blocks recursively
+        _sqrt_quasitriu!(R11, A11; blockwidth=blockwidth, n=n1)
+        _sqrt_quasitriu!(R22, A22; blockwidth=blockwidth, n=n2)
+        # solve off-diagonal block
+        R12 .= .- A12
+        _sylvester_quasitriu!(R11, R22, R12; blockwidth=blockwidth, nA=n1, nB=n2, raise=false)
+    end
+    return R
+end
+
+function _sqrt_quasitriu_block!(R, A)
     _sqrt_quasitriu_diag_block!(R, A)
     _sqrt_quasitriu_offdiag_block!(R, A)
     return R
@@ -2435,25 +2441,30 @@ function _sqrt_quasitriu_offdiag_block!(R, A)
     return R
 end
 
+# real square root of 2x2 diagonal block of quasi-triangular matrix from real Schur
+# decomposition. Eqs 6.8-6.9 and Algorithm 6.5 of
+# Higham, 2008, "Functions of Matrices: Theory and Computation", SIAM.
 Base.@propagate_inbounds function _sqrt_real_2x2!(R, A)
-    a11, a21, a12, a22 = A[1, 1], A[2, 1], A[1, 2], A[2, 2]
-    θ = (a11 + a22) / 2
-    μ² = -(a11 - a22)^2 / 4 - a21 * a12
-    μ = sqrt(μ²)
-    if θ > 0
-        α = sqrt((sqrt(θ^2 + μ²) + θ) / 2)
-    else
-        α = μ / sqrt(2 * (sqrt(θ^2 + μ²) - θ))
-    end
+    # in the real Schur form, A[1, 1] == A[2, 2], and A[2, 1] * A[1, 2] < 0
+    θ, a21, a12 = A[1, 1], A[2, 1], A[1, 2]
+    # avoid overflow/underflow of μ
+    # for real sqrt, |d| ≤ 2 max(|a12|,|a21|)
+    μ = sqrt(abs(a12)) * sqrt(abs(a21))
+    α = _real_sqrt(θ, μ)
     c = 2α
-    d = α - θ / c
-    R[1, 1] = a11 / c + d
+    R[1, 1] = α
     R[2, 1] = a21 / c
     R[1, 2] = a12 / c
-    R[2, 2] = a22 / c + d
+    R[2, 2] = α
     return R
 end
 
+# real part of square root of θ+im*μ
+@inline function _real_sqrt(θ, μ)
+    t = sqrt((abs(θ) + hypot(θ, μ)) / 2)
+    return θ ≥ 0 ? t : μ / 2t
+end
+
 Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
     Rii = R[i, i]
     Rjj = R[j, j]
@@ -2522,10 +2533,89 @@ Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x2!(R, A, i, j)
     Rii = @view R[irange, irange]
     Rjj = @view R[jrange, jrange]
     Rij = @view R[irange, jrange]
-    _sylvester_2x2!(Rii, Rjj, Rij)
+    if !iszero(Rij) && !all(isnan, Rij)
+        _sylvester_2x2!(Rii, Rjj, Rij)
+    end
     return R
 end
 
+# solve Sylvester's equation AX + XB = -C using blockwise recursion until the dimension of
+# A and B are no greater than blockwidth, based on Algorithm 1 from
+# Jonsson I, Kågström B. Recursive blocked algorithms for solving triangular systems—
+# Part I: one-sided and coupled Sylvester-type matrix equations. (2002) ACM Trans Math Softw.
+# 28(4), https://doi.org/10.1145/592843.592845.
+# specify raise=false to avoid breaking the recursion if a LAPACKException is thrown when
+# computing one of the blocks.
+function _sylvester_quasitriu!(A, B, C; blockwidth=64, nA=checksquare(A), nB=checksquare(B), raise=true)
+    if 1 ≤ nA ≤ blockwidth && 1 ≤ nB ≤ blockwidth
+        _sylvester_quasitriu_base!(A, B, C; raise=raise)
+    elseif nA ≥ 2nB ≥ 2
+        _sylvester_quasitriu_split1!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
+    elseif nB ≥ 2nA ≥ 2
+        _sylvester_quasitriu_split2!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
+    else
+        _sylvester_quasitriu_splitall!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
+    end
+    return C
+end
+function _sylvester_quasitriu_base!(A, B, C; raise=true)
+    try
+        _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
+        rmul!(C, -inv(scale))
+    catch e
+        if !(e isa LAPACKException) || raise
+            throw(e)
+        end
+    end
+    return C
+end
+function _sylvester_quasitriu_split1!(A, B, C; nA=checksquare(A), kwargs...)
+    iA = div(nA, 2)
+    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
+    rA1, rA2 = 1:iA, (iA + 1):nA
+    nA1, nA2 = iA, nA-iA
+    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
+    C1, C2 = @views C[rA1,:], C[rA2,:]
+    _sylvester_quasitriu!(A22, B, C2; nA=nA2, kwargs...)
+    mul!(C1, A12, C2, true, true)
+    _sylvester_quasitriu!(A11, B, C1; nA=nA1, kwargs...)
+    return C
+end
+function _sylvester_quasitriu_split2!(A, B, C; nB=checksquare(B), kwargs...)
+    iB = div(nB, 2)
+    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
+    rB1, rB2 = 1:iB, (iB + 1):nB
+    nB1, nB2 = iB, nB-iB
+    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
+    C1, C2 = @views C[:,rB1], C[:,rB2]
+    _sylvester_quasitriu!(A, B11, C1; nB=nB1, kwargs...)
+    mul!(C2, C1, B12, true, true)
+    _sylvester_quasitriu!(A, B22, C2; nB=nB2, kwargs...)
+    return C
+end
+function _sylvester_quasitriu_splitall!(A, B, C; nA=checksquare(A), nB=checksquare(B), kwargs...)
+    iA = div(nA, 2)
+    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
+    iB = div(nB, 2)
+    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
+    rA1, rA2 = 1:iA, (iA + 1):nA
+    nA1, nA2 = iA, nA-iA
+    rB1, rB2 = 1:iB, (iB + 1):nB
+    nB1, nB2 = iB, nB-iB
+    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
+    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
+    C11, C21, C12, C22 = @views C[rA1,rB1], C[rA2,rB1], C[rA1,rB2], C[rA2,rB2]
+    _sylvester_quasitriu!(A22, B11, C21; nA=nA2, nB=nB1, kwargs...)
+    mul!(C11, A12, C21, true, true)
+    _sylvester_quasitriu!(A11, B11, C11; nA=nA1, nB=nB1, kwargs...)
+    mul!(C22, C21, B12, true, true)
+    _sylvester_quasitriu!(A22, B22, C22; nA=nA2, nB=nB2, kwargs...)
+    mul!(C12, A12, C22, true, true)
+    mul!(C12, C11, B12, true, true)
+    _sylvester_quasitriu!(A11, B22, C12; nA=nA1, nB=nB2, kwargs...)
+    return C
+end
+
 # End of auxiliary functions for matrix square root
 
 # Generic eigensystems
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
index 8420750f8f4a14..cc551e4911acf3 100644
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ b/stdlib/LinearAlgebra/src/tridiag.jl
@@ -159,6 +159,9 @@ similar(S::SymTridiagonal, ::Type{T}) where {T} = SymTridiagonal(similar(S.dv, T
 # The method below is moved to SparseArrays for now
 # similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
 
+copyto!(dest::SymTridiagonal, src::SymTridiagonal) =
+    (copyto!(dest.dv, src.dv); copyto!(dest.ev, src.ev); dest)
+
 #Elementary operations
 for func in (:conj, :copy, :real, :imag)
     @eval ($func)(M::SymTridiagonal) = SymTridiagonal(($func)(M.dv), ($func)(M.ev))
@@ -168,7 +171,9 @@ transpose(S::SymTridiagonal) = S
 adjoint(S::SymTridiagonal{<:Real}) = S
 adjoint(S::SymTridiagonal) = Adjoint(S)
 Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(adjoint.(x)), (S.parent.dv, S.parent.ev))...)
-Base.copy(S::Transpose{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(transpose.(x)), (S.parent.dv, S.parent.ev))...)
+
+ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(@view S.ev[begin:length(S.dv) - 1])
+issymmetric(S::SymTridiagonal) = true
 
 function diag(M::SymTridiagonal{<:Number}, n::Integer=0)
     # every branch call similar(..., ::Int) to make sure the
@@ -608,7 +613,10 @@ transpose(S::Tridiagonal{<:Number}) = Tridiagonal(S.du, S.d, S.dl)
 Base.copy(aS::Adjoint{<:Any,<:Tridiagonal}) = (S = aS.parent; Tridiagonal(map(x -> copy.(adjoint.(x)), (S.du, S.d, S.dl))...))
 Base.copy(tS::Transpose{<:Any,<:Tridiagonal}) = (S = tS.parent; Tridiagonal(map(x -> copy.(transpose.(x)), (S.du, S.d, S.dl))...))
 
-\(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:StridedVecOrMat}) = copy(A) \ copy(B)
+ishermitian(S::Tridiagonal) = isreal(S.d) && S.du == adjoint.(S.dl)
+issymmetric(S::Tridiagonal) = S.du == S.dl
+
+\(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:StridedVecOrMat}) = copy(A) \ B
 
 function diag(M::Tridiagonal, n::Integer=0)
     # every branch call similar(..., ::Int) to make sure the
@@ -747,7 +755,7 @@ det(A::Tridiagonal) = det_usmani(A.dl, A.d, A.du)
 AbstractMatrix{T}(M::Tridiagonal) where {T} = Tridiagonal{T}(M)
 Tridiagonal{T}(M::SymTridiagonal{T}) where {T} = Tridiagonal(M)
 function SymTridiagonal{T}(M::Tridiagonal) where T
-    if M.dl == M.du
+    if issymmetric(M)
         return SymTridiagonal{T}(convert(AbstractVector{T},M.d), convert(AbstractVector{T},M.dl))
     else
         throw(ArgumentError("Tridiagonal is not symmetric, cannot convert to SymTridiagonal"))
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
index c59871e0641ef0..019e6eddc17de4 100644
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/src/uniformscaling.jl
@@ -88,23 +88,23 @@ ndims(J::UniformScaling) = 2
 Base.has_offset_axes(::UniformScaling) = false
 getindex(J::UniformScaling, i::Integer,j::Integer) = ifelse(i==j,J.λ,zero(J.λ))
 
-getindex(x::UniformScaling, n::Integer, m::AbstractRange{<:Integer}) = getindex(x, m, n)
-function getindex(x::UniformScaling{T}, n::AbstractRange{<:Integer}, m::Integer) where T
+getindex(J::UniformScaling, n::Integer, m::AbstractVector{<:Integer}) = getindex(J, m, n)
+function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::Integer) where T
     v = zeros(T, length(n))
     @inbounds for (i,ii) in enumerate(n)
         if ii == m
-            v[i] = x.λ
+            v[i] = J.λ
         end
     end
     return v
 end
 
 
-function getindex(x::UniformScaling{T}, n::AbstractRange{<:Integer}, m::AbstractRange{<:Integer}) where T
+function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::AbstractVector{<:Integer}) where T
     A = zeros(T, length(n), length(m))
     @inbounds for (j,jj) in enumerate(m), (i,ii) in enumerate(n)
         if ii == jj
-            A[i,j] = x.λ
+            A[i,j] = J.λ
         end
     end
     return A
@@ -215,8 +215,8 @@ end
 function (+)(A::AbstractMatrix, J::UniformScaling)
     checksquare(A)
     B = copy_oftype(A, Base._return_type(+, Tuple{eltype(A), typeof(J)}))
-    @inbounds for i in axes(A, 1)
-        B[i,i] += J
+    for i in intersect(axes(A,1), axes(A,2))
+        @inbounds B[i,i] += J
     end
     return B
 end
@@ -224,8 +224,8 @@ end
 function (-)(J::UniformScaling, A::AbstractMatrix)
     checksquare(A)
     B = convert(AbstractMatrix{Base._return_type(+, Tuple{eltype(A), typeof(J)})}, -A)
-    @inbounds for i in axes(A, 1)
-        B[i,i] += J
+    for i in intersect(axes(A,1), axes(A,2))
+        @inbounds B[i,i] += J
     end
     return B
 end
@@ -265,7 +265,8 @@ end
 *(J::UniformScaling, x::Number) = UniformScaling(J.λ*x)
 
 /(J1::UniformScaling, J2::UniformScaling) = J2.λ == 0 ? throw(SingularException(1)) : UniformScaling(J1.λ/J2.λ)
-/(J::UniformScaling, A::AbstractMatrix) = lmul!(J.λ, inv(A))
+/(J::UniformScaling, A::AbstractMatrix) =
+    (invA = inv(A); lmul!(J.λ, convert(AbstractMatrix{promote_type(eltype(J),eltype(invA))}, invA)))
 /(A::AbstractMatrix, J::UniformScaling) = J.λ == 0 ? throw(SingularException(1)) : A/J.λ
 /(v::AbstractVector, J::UniformScaling) = reshape(v, length(v), 1) / J
 
@@ -273,7 +274,8 @@ end
 
 \(J1::UniformScaling, J2::UniformScaling) = J1.λ == 0 ? throw(SingularException(1)) : UniformScaling(J1.λ\J2.λ)
 \(J::UniformScaling, A::AbstractVecOrMat) = J.λ == 0 ? throw(SingularException(1)) : J.λ\A
-\(A::AbstractMatrix, J::UniformScaling) = rmul!(inv(A), J.λ)
+\(A::AbstractMatrix, J::UniformScaling) =
+    (invA = inv(A); rmul!(convert(AbstractMatrix{promote_type(eltype(invA),eltype(J))}, invA), J.λ))
 \(F::Factorization, J::UniformScaling) = F \ J(size(F,1))
 
 \(x::Number, J::UniformScaling) = UniformScaling(x\J.λ)
@@ -282,6 +284,24 @@ end
     mul!(C, A, J.λ, alpha, beta)
 @inline mul!(C::AbstractVecOrMat, J::UniformScaling, B::AbstractVecOrMat, alpha::Number, beta::Number) =
     mul!(C, J.λ, B, alpha, beta)
+
+function mul!(out::AbstractMatrix{T}, a::Number, B::UniformScaling, α::Number, β::Number) where {T}
+    checksquare(out)
+    if iszero(β)  # zero contribution of the out matrix
+        fill!(out, zero(T))
+    elseif !isone(β)
+        rmul!(out, β)
+    end
+    s = convert(T, a*B.λ*α)
+    if !iszero(s)
+        @inbounds for i in diagind(out)
+            out[i] += s
+        end
+    end
+    return out
+end
+@inline mul!(out::AbstractMatrix, A::UniformScaling, b::Number, α::Number, β::Number)=
+    mul!(out, A.λ, UniformScaling(b), α, β)
 rmul!(A::AbstractMatrix, J::UniformScaling) = rmul!(A, J.λ)
 lmul!(J::UniformScaling, B::AbstractVecOrMat) = lmul!(J.λ, B)
 rdiv!(A::AbstractMatrix, J::UniformScaling) = rdiv!(A, J.λ)
@@ -371,6 +391,7 @@ end
 # in A to matrices of type T and sizes given by n[k:end].  n is an array
 # so that the same promotion code can be used for hvcat.  We pass the type T
 # so that we can re-use this code for sparse-matrix hcat etcetera.
+promote_to_arrays_(n::Int, ::Type, a::Number) = a
 promote_to_arrays_(n::Int, ::Type{Matrix}, J::UniformScaling{T}) where {T} = copyto!(Matrix{T}(undef, n,n), J)
 promote_to_arrays_(n::Int, ::Type, A::AbstractVecOrMat) = A
 promote_to_arrays(n,k, ::Type) = ()
@@ -381,11 +402,13 @@ promote_to_arrays(n,k, ::Type{T}, A, B, C) where {T} =
     (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays_(n[k+2], T, C))
 promote_to_arrays(n,k, ::Type{T}, A, B, Cs...) where {T} =
     (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays(n,k+2, T, Cs...)...)
-promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling}}}) = Matrix
+promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling,Number}}}) = Matrix
 
-for (f,dim,name) in ((:hcat,1,"rows"), (:vcat,2,"cols"))
+for (f, _f, dim, name) in ((:hcat, :_hcat, 1, "rows"), (:vcat, :_vcat, 2, "cols"))
     @eval begin
-        function $f(A::Union{AbstractVecOrMat,UniformScaling}...)
+        @inline $f(A::Union{AbstractVecOrMat,UniformScaling}...) = $_f(A...)
+        @inline $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...) = $_f(A...)
+        function $_f(A::Union{AbstractVecOrMat,UniformScaling,Number}...)
             n = -1
             for a in A
                 if !isa(a, UniformScaling)
@@ -398,13 +421,14 @@ for (f,dim,name) in ((:hcat,1,"rows"), (:vcat,2,"cols"))
                 end
             end
             n == -1 && throw(ArgumentError($("$f of only UniformScaling objects cannot determine the matrix size")))
-            return $f(promote_to_arrays(fill(n,length(A)),1, promote_to_array_type(A), A...)...)
+            return cat(promote_to_arrays(fill(n, length(A)), 1, promote_to_array_type(A), A...)..., dims=Val(3-$dim))
         end
     end
 end
 
-
-function hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling}...)
+hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling}...) = _hvcat(rows, A...)
+hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...) = _hvcat(rows, A...)
+function _hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...)
     require_one_based_indexing(A...)
     nr = length(rows)
     sum(rows) == length(A) || throw(ArgumentError("mismatch between row sizes and number of arguments"))
@@ -447,8 +471,8 @@ function hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScalin
         j = 0
         for i = 1:nr
             if rows[i] > 0 && n[j+1] == -1 # this row consists entirely of UniformScalings
-                nci = nc ÷ rows[i]
-                nci * rows[i] != nc && throw(DimensionMismatch("indivisible UniformScaling sizes"))
+                nci, r = divrem(nc, rows[i])
+                r != 0 && throw(DimensionMismatch("indivisible UniformScaling sizes"))
                 for k = 1:rows[i]
                     n[j+k] = nci
                 end
@@ -456,7 +480,18 @@ function hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScalin
             j += rows[i]
         end
     end
-    return hvcat(rows, promote_to_arrays(n,1, promote_to_array_type(A), A...)...)
+    Atyp = promote_to_array_type(A)
+    Amat = promote_to_arrays(n, 1, Atyp, A...)
+    # We have two methods for promote_to_array_type, one returning Matrix and
+    # another one returning SparseMatrixCSC (in SparseArrays.jl). In the dense
+    # case, we cannot call hvcat for the promoted UniformScalings because this
+    # causes a stack overflow. In the sparse case, however, we cannot call
+    # typed_hvcat because we need a sparse output.
+    if Atyp == Matrix
+        return typed_hvcat(promote_eltype(Amat...), rows, Amat...)
+    else
+        return hvcat(rows, Amat...)
+    end
 end
 
 ## Matrix construction from UniformScaling
@@ -480,6 +515,9 @@ Array(s::UniformScaling, dims::Dims{2}) = Matrix(s, dims)
 Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
 Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
 
+dot(A::AbstractMatrix, J::UniformScaling) = dot(tr(A), J.λ)
+dot(J::UniformScaling, A::AbstractMatrix) = dot(J.λ, tr(A))
+
 dot(x::AbstractVector, J::UniformScaling, y::AbstractVector) = dot(x, J.λ, y)
 dot(x::AbstractVector, a::Number, y::AbstractVector) = sum(t -> dot(t[1], a, t[2]), zip(x, y))
 dot(x::AbstractVector, a::Union{Real,Complex}, y::AbstractVector) = a*dot(x, y)
diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl
index bb2e54d3062245..8226ddf004a72f 100644
--- a/stdlib/LinearAlgebra/test/adjtrans.jl
+++ b/stdlib/LinearAlgebra/test/adjtrans.jl
@@ -4,18 +4,20 @@ module TestAdjointTranspose
 
 using Test, LinearAlgebra, SparseArrays
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+
 @testset "Adjoint and Transpose inner constructor basics" begin
     intvec, intmat = [1, 2], [1 2; 3 4]
     # Adjoint/Transpose eltype must match the type of the Adjoint/Transpose of the input eltype
-    @test_throws ErrorException Adjoint{Float64,Vector{Int}}(intvec)
-    @test_throws ErrorException Adjoint{Float64,Matrix{Int}}(intmat)
-    @test_throws ErrorException Transpose{Float64,Vector{Int}}(intvec)
-    @test_throws ErrorException Transpose{Float64,Matrix{Int}}(intmat)
+    @test_throws TypeError Adjoint{Float64,Vector{Int}}(intvec)[1,1]
+    @test_throws TypeError Adjoint{Float64,Matrix{Int}}(intmat)[1,1]
+    @test_throws TypeError Transpose{Float64,Vector{Int}}(intvec)[1,1]
+    @test_throws TypeError Transpose{Float64,Matrix{Int}}(intmat)[1,1]
     # Adjoint/Transpose wrapped array type must match the input array type
-    @test_throws MethodError Adjoint{Int,Vector{Float64}}(intvec)
-    @test_throws MethodError Adjoint{Int,Matrix{Float64}}(intmat)
-    @test_throws MethodError Transpose{Int,Vector{Float64}}(intvec)
-    @test_throws MethodError Transpose{Int,Matrix{Float64}}(intmat)
+    @test_throws TypeError Adjoint{Int,Vector{Float64}}(intvec)[1,1]
+    @test_throws TypeError Adjoint{Int,Matrix{Float64}}(intmat)[1,1]
+    @test_throws TypeError Transpose{Int,Vector{Float64}}(intvec)[1,1]
+    @test_throws TypeError Transpose{Int,Matrix{Float64}}(intmat)[1,1]
     # Adjoint/Transpose inner constructor basic functionality, concrete scalar eltype
     @test (Adjoint{Int,Vector{Int}}(intvec)::Adjoint{Int,Vector{Int}}).parent === intvec
     @test (Adjoint{Int,Matrix{Int}}(intmat)::Adjoint{Int,Matrix{Int}}).parent === intmat
@@ -239,6 +241,25 @@ end
     @test convert(Transpose{Float64,Matrix{Float64}}, Transpose(intmat))::Transpose{Float64,Matrix{Float64}} == Transpose(intmat)
 end
 
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Adjoint and Transpose convert methods to AbstractArray" begin
+    # tests corresponding to #34995
+    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
+    statvec = ImmutableArray(intvec)
+    statmat = ImmutableArray(intmat)
+
+    @test convert(AbstractArray{Float64}, Adjoint(statvec))::Adjoint{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Adjoint(statvec)
+    @test convert(AbstractArray{Float64}, Adjoint(statmat))::Array{Float64,2} == Adjoint(statmat)
+    @test convert(AbstractArray{Float64}, Transpose(statvec))::Transpose{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Transpose(statvec)
+    @test convert(AbstractArray{Float64}, Transpose(statmat))::Array{Float64,2} == Transpose(statmat)
+    @test convert(AbstractMatrix{Float64}, Adjoint(statvec))::Adjoint{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Adjoint(statvec)
+    @test convert(AbstractMatrix{Float64}, Adjoint(statmat))::Array{Float64,2} == Adjoint(statmat)
+    @test convert(AbstractMatrix{Float64}, Transpose(statvec))::Transpose{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Transpose(statvec)
+    @test convert(AbstractMatrix{Float64}, Transpose(statmat))::Array{Float64,2} == Transpose(statmat)
+end
+
 @testset "Adjoint and Transpose similar methods" begin
     intvec, intmat = [1, 2], [1 2 3; 4 5 6]
     # similar with no additional specifications, vector (rewrapping) semantics
@@ -275,6 +296,9 @@ end
     @test vec(Transpose(intvec)) === intvec
     cvec = [1 + 1im]
     @test vec(cvec')[1] == cvec[1]'
+    mvec = [[1 2; 3 4+5im]];
+    @test vec(transpose(mvec))[1] == transpose(mvec[1])
+    @test vec(adjoint(mvec))[1] == adjoint(mvec[1])
 end
 
 @testset "horizontal concatenation of Adjoint/Transpose-wrapped vectors and Numbers" begin
@@ -524,7 +548,6 @@ end
     @test pointer(Transpose(D)) === pointer(D)
 end
 
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
@@ -573,4 +596,24 @@ end
     @test transpose(Int[]) * Int[] == 0
 end
 
+@testset "reductions: $adjtrans" for adjtrans in [transpose, adjoint]
+    mat = rand(ComplexF64, 3,5)
+    @test sum(adjtrans(mat)) ≈ sum(collect(adjtrans(mat)))
+    @test sum(adjtrans(mat), dims=1) ≈ sum(collect(adjtrans(mat)), dims=1)
+    @test sum(adjtrans(mat), dims=(1,2)) ≈ sum(collect(adjtrans(mat)), dims=(1,2))
+
+    @test sum(imag, adjtrans(mat)) ≈ sum(imag, collect(adjtrans(mat)))
+    @test sum(imag, adjtrans(mat), dims=1) ≈ sum(imag, collect(adjtrans(mat)), dims=1)
+
+    mat = [rand(ComplexF64,2,2) for _ in 1:3, _ in 1:5]
+    @test sum(adjtrans(mat)) ≈ sum(collect(adjtrans(mat)))
+    @test sum(adjtrans(mat), dims=1) ≈ sum(collect(adjtrans(mat)), dims=1)
+    @test sum(adjtrans(mat), dims=(1,2)) ≈ sum(collect(adjtrans(mat)), dims=(1,2))
+
+    @test sum(imag, adjtrans(mat)) ≈ sum(imag, collect(adjtrans(mat)))
+    @test sum(x -> x[1,2], adjtrans(mat)) ≈ sum(x -> x[1,2], collect(adjtrans(mat)))
+    @test sum(imag, adjtrans(mat), dims=1) ≈ sum(imag, collect(adjtrans(mat)), dims=1)
+    @test sum(x -> x[1,2], adjtrans(mat), dims=1) ≈ sum(x -> x[1,2], collect(adjtrans(mat)), dims=1)
+end
+
 end # module TestAdjointTranspose
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
index e4dcd14053778f..d9efdc1fd3ee60 100644
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ b/stdlib/LinearAlgebra/test/bidiag.jl
@@ -240,9 +240,9 @@ Random.seed!(1)
             @test_throws DimensionMismatch transpose(T) \ offsizemat
             @test_throws DimensionMismatch T' \ offsizemat
 
-            if elty <: BlasReal
-                @test_throws SingularException LinearAlgebra.naivesub!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :U), rand(elty, n))
-                @test_throws SingularException LinearAlgebra.naivesub!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :L), rand(elty, n))
+            if elty <: BigFloat
+                @test_throws SingularException ldiv!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :U), rand(elty, n))
+                @test_throws SingularException ldiv!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :L), rand(elty, n))
             end
             let bb = b, cc = c
                 for atype in ("Array", "SubArray")
@@ -256,7 +256,7 @@ Random.seed!(1)
                 end
                 x = T \ b
                 tx = Tfull \ b
-                @test_throws DimensionMismatch LinearAlgebra.naivesub!(T,Vector{elty}(undef,n+1))
+                @test_throws DimensionMismatch ldiv!(T, Vector{elty}(undef, n+1))
                 @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
                 x = transpose(T) \ b
                 tx = transpose(Tfull) \ b
@@ -649,4 +649,20 @@ end
     @test c \ A ≈ c \ Matrix(A)
 end
 
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    dv = ImmutableArray([1, 2, 3, 4])
+    ev = ImmutableArray([7, 8, 9])
+    Bu = Bidiagonal(dv, ev, :U)
+    Bl = Bidiagonal(dv, ev, :L)
+
+    @test convert(AbstractArray{Float64}, Bu)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bu
+    @test convert(AbstractMatrix{Float64}, Bu)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bu
+    @test convert(AbstractArray{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
+    @test convert(AbstractMatrix{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
+end
+
 end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl
index be325617acb539..df29c171b20606 100644
--- a/stdlib/LinearAlgebra/test/blas.jl
+++ b/stdlib/LinearAlgebra/test/blas.jl
@@ -370,6 +370,41 @@ Random.seed!(100)
         @test all(o4cp .== z4)
         @test all(BLAS.gemv('N', U4, o4) .== v41)
         @test all(BLAS.gemv('N', U4, o4) .== v41)
+        @testset "non-standard strides" begin
+            if elty <: Complex
+                A = elty[1+2im 3+4im 5+6im 7+8im; 2+3im 4+5im 6+7im 8+9im; 3+4im 5+6im 7+8im 9+10im]
+                v = elty[1+2im, 2+3im, 3+4im, 4+5im, 5+6im]
+                dest = view(ones(elty, 7), 6:-2:2)
+                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(v, 1:3:4), elty(3), dest) == elty[-31+154im, -35+178im, -39+202im]
+                @test BLAS.gemv('N', elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[15-41im, 17-49im]
+                @test BLAS.gemv('N', view(A, 1:0, 1:2), view(v, 1:2)) == elty[]
+                dest = view(ones(elty, 5), 4:-2:2)
+                @test BLAS.gemv!('T', elty(2), view(A, :, 2:2:4), view(v, 1:2:5), elty(3), dest) == elty[-45+202im, -69+370im]
+                @test BLAS.gemv('T', elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[14-38im, 18-54im]
+                @test BLAS.gemv('T', view(A, 2:3, 2:1), view(v, 1:2)) == elty[]
+                dest = view(ones(elty, 5), 4:-2:2)
+                @test BLAS.gemv!('C', elty(2), view(A, :, 2:2:4), view(v, 5:-2:1), elty(3), dest) == elty[179+6im, 347+30im]
+                @test BLAS.gemv('C', elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[-40-6im, -56-10im]
+                @test BLAS.gemv('C', view(A, 2:3, 2:1), view(v, 1:2)) == elty[]
+            else
+                A = elty[1 2 3 4; 5 6 7 8; 9 10 11 12]
+                v = elty[1, 2, 3, 4, 5]
+                dest = view(ones(elty, 7), 6:-2:2)
+                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(v, 1:3:4), elty(3), dest) == elty[39, 79, 119]
+                @test BLAS.gemv('N', elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[-19, -31]
+                @test BLAS.gemv('N', view(A, 1:0, 1:2), view(v, 1:2)) == elty[]
+                for trans = ('T', 'C')
+                    dest = view(ones(elty, 5), 4:-2:2)
+                    @test BLAS.gemv!(trans, elty(2), view(A, :, 2:2:4), view(v, 1:2:5), elty(3), dest) == elty[143, 179]
+                    @test BLAS.gemv(trans, elty(-1), view(A, 2:3, 2:3), view(v, 2:-1:1)) == elty[-22, -25]
+                    @test BLAS.gemv(trans, view(A, 2:3, 2:1), view(v, 1:2)) == elty[]
+                end
+            end
+            for trans = ('N', 'T', 'C')
+                @test_throws ErrorException BLAS.gemv(trans, view(A, 1:2:3, 1:2), view(v, 1:2))
+                @test_throws ErrorException BLAS.gemv(trans, view(A, 1:2, 2:-1:1), view(v, 1:2))
+            end
+        end
     end
     @testset "gemm" begin
         @test all(BLAS.gemm('N', 'N', I4, I4) .== I4)
@@ -459,6 +494,7 @@ Base.setindex!(A::WrappedArray{T, N}, v, I::Vararg{Int, N}) where {T, N} = setin
 Base.unsafe_convert(::Type{Ptr{T}}, A::WrappedArray{T}) where T = Base.unsafe_convert(Ptr{T}, A.A)
 
 Base.strides(A::WrappedArray) = strides(A.A)
+Base.elsize(::Type{WrappedArray{T,N}}) where {T,N} = Base.elsize(Array{T,N})
 
 @testset "strided interface adjtrans" begin
     x = WrappedArray([1, 2, 3, 4])
@@ -514,6 +550,11 @@ end
         BLAS.axpby!(elty(2), x, elty(3), y)
         @test y == WrappedArray(elty[19, 50, 30, 56])
         @test BLAS.iamax(x) == 2
+
+        M = fill(elty(1.0), 3, 3)
+        BLAS.scal!(elty(2), view(M,:,2))
+        BLAS.scal!(elty(3), view(M,3,:))
+        @test M == elty[1. 2. 1.; 1. 2. 1.; 3. 6. 3.]
     # Level 2
         A = WrappedArray(elty[1 2; 3 4])
         x = WrappedArray(elty[1, 2])
diff --git a/stdlib/LinearAlgebra/test/bunchkaufman.jl b/stdlib/LinearAlgebra/test/bunchkaufman.jl
index 5098f818f18043..f1da22d8733e26 100644
--- a/stdlib/LinearAlgebra/test/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/test/bunchkaufman.jl
@@ -12,7 +12,7 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(12343210)
+Random.seed!(12343212)
 
 areal = randn(n,n)/2
 aimg  = randn(n,n)/2
@@ -114,7 +114,8 @@ bimg  = randn(n,2)/2
                         @test logabsdet(bc2)[2] == sign(det(bc2))
                         @test inv(bc2)*apd ≈ Matrix(I, n, n)
                         @test apd*(bc2\b) ≈ b rtol=eps(cond(apd))
-                        @test ishermitian(bc2) == !issymmetric(bc2)
+                        @test ishermitian(bc2)
+                        @test !issymmetric(bc2) || eltya <: Real
                     end
                 end
             end
@@ -171,4 +172,22 @@ end
     end
 end
 
+@testset "adjoint of BunchKaufman" begin
+    Ar = randn(5, 5)
+    Ar = Ar + Ar'
+    Actmp = complex.(randn(5, 5), randn(5, 5))
+    Ac1 = Actmp + Actmp'
+    Ac2 = Actmp + transpose(Actmp)
+    b = ones(size(Ar, 1))
+
+    F = bunchkaufman(Ar)
+    @test F\b == F'\b
+
+    F = bunchkaufman(Ac1)
+    @test F\b == F'\b
+
+    F = bunchkaufman(Ac2)
+    @test_throws ArgumentError("adjoint not implemented for complex symmetric matrices") F'
+end
+
 end # module TestBunchKaufman
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
index a3f780c047a296..aa647755564860 100644
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ b/stdlib/LinearAlgebra/test/cholesky.jl
@@ -39,7 +39,7 @@ end
     n1 = div(n, 2)
     n2 = 2*n1
 
-    Random.seed!(12343)
+    Random.seed!(12344)
 
     areal = randn(n,n)/2
     aimg  = randn(n,n)/2
@@ -475,4 +475,38 @@ end
     end
 end
 
+@testset "adjoint of Cholesky" begin
+    A = randn(5, 5)
+    A = A'A
+    F = cholesky(A)
+    b = ones(size(A, 1))
+    @test F\b == F'\b
+end
+
+@testset "Float16" begin
+    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
+    B = cholesky(A)
+    B32 = cholesky(Float32.(A))
+    @test B isa Cholesky{Float16, Matrix{Float16}}
+    @test B.U isa UpperTriangular{Float16, Matrix{Float16}}
+    @test B.L isa LowerTriangular{Float16, Matrix{Float16}}
+    @test B.UL isa UpperTriangular{Float16, Matrix{Float16}}
+    @test B.U ≈ B32.U
+    @test B.L ≈ B32.L
+    @test B.UL ≈ B32.UL
+end
+
+@testset "det and logdet" begin
+    A = [4083 3825 5876 2048 4470 5490;
+         3825 3575 5520 1920 4200 5140;
+         5876 5520 8427 2940 6410 7903;
+         2048 1920 2940 1008 2240 2740;
+         4470 4200 6410 2240 4875 6015;
+         5490 5140 7903 2740 6015 7370]
+    B = cholesky(A, Val(true), check=false)
+    @test det(B)  ==  0.0
+    @test det(B)  ≈  det(A) atol=eps()
+    @test logdet(B)  ==  -Inf
+ end
+
 end # module TestCholesky
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
index 51edd334f1e0f0..57cb06786e994d 100644
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ b/stdlib/LinearAlgebra/test/dense.jl
@@ -15,17 +15,17 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(1234321)
+Random.seed!(1234323)
 
 @testset "Matrix condition number" begin
     ainit = rand(n,n)
     @testset "for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
         ainit = convert(Matrix{elty}, ainit)
         for a in (copy(ainit), view(ainit, 1:n, 1:n))
-            @test cond(a,1) ≈ 4.837320054554436e+02 atol=0.01
-            @test cond(a,2) ≈ 1.960057871514615e+02 atol=0.01
-            @test cond(a,Inf) ≈ 3.757017682707787e+02 atol=0.01
-            @test cond(a[:,1:5]) ≈ 10.233059337453463 atol=0.01
+            @test cond(a,1) ≈ 50.60863783272028 atol=0.5
+            @test cond(a,2) ≈ 23.059634761613314 atol=0.5
+            @test cond(a,Inf) ≈ 45.12503933120795 atol=0.4
+            @test cond(a[:,1:5]) ≈ 5.719500544258695 atol=0.01
             @test_throws ArgumentError cond(a,3)
         end
     end
@@ -88,8 +88,19 @@ bimg  = randn(n,2)/2
                 @test nullspace(zeros(eltya,n)) == Matrix(I, 1, 1)
                 @test nullspace(zeros(eltya,n), 0.1) == Matrix(I, 1, 1)
                 # test empty cases
-                @test nullspace(zeros(n, 0)) == Matrix(I, 0, 0)
-                @test nullspace(zeros(0, n)) == Matrix(I, n, n)
+                @test @inferred(nullspace(zeros(n, 0))) == Matrix(I, 0, 0)
+                @test @inferred(nullspace(zeros(0, n))) == Matrix(I, n, n)
+                # test vector cases
+                @test size(@inferred nullspace(a[:, 1])) == (1, 0)
+                @test size(@inferred nullspace(zero(a[:, 1]))) == (1, 1)
+                @test nullspace(zero(a[:, 1]))[1,1] == 1
+                # test adjortrans vectors, including empty ones
+                @test size(@inferred nullspace(a[:, 1]')) == (n, n - 1)
+                @test @inferred(nullspace(a[1:0, 1]')) == Matrix(I, 0, 0)
+                @test size(@inferred nullspace(b[1, :]')) == (2, 1)
+                @test @inferred(nullspace(b[1, 1:0]')) == Matrix(I, 0, 0)
+                @test size(@inferred nullspace(transpose(a[:, 1]))) == (n, n - 1)
+                @test size(@inferred nullspace(transpose(b[1, :]))) == (2, 1)
             end
         end
     end # for eltyb
@@ -145,9 +156,13 @@ end
         @testset "Matrix square root" begin
             asq = sqrt(a)
             @test asq*asq ≈ a
+            @test sqrt(transpose(a))*sqrt(transpose(a)) ≈ transpose(a)
+            @test sqrt(adjoint(a))*sqrt(adjoint(a)) ≈ adjoint(a)
             asym = a + a' # symmetric indefinite
             asymsq = sqrt(asym)
             @test asymsq*asymsq ≈ asym
+            @test sqrt(transpose(asym))*sqrt(transpose(asym)) ≈ transpose(asym)
+            @test sqrt(adjoint(asym))*sqrt(adjoint(asym)) ≈ adjoint(asym)
             if eltype(a) <: Real  # real square root
                 apos = a * a
                 @test sqrt(apos)^2 ≈ apos
@@ -447,6 +462,11 @@ end
                                      183.765138646367 183.765138646366  163.679601723179;
                                       71.797032399996  91.8825693231832 111.968106246371]')
         @test exp(A1) ≈ eA1
+        @test exp(adjoint(A1)) ≈ adjoint(eA1)
+        @test exp(transpose(A1)) ≈ transpose(eA1)
+        for f in (sin, cos, sinh, cosh, tanh, tan)
+            @test f(adjoint(A1)) ≈ f(copy(adjoint(A1)))
+        end
 
         A2  = convert(Matrix{elty},
                       [29.87942128909879    0.7815750847907159 -2.289519314033932;
@@ -457,26 +477,45 @@ end
                        -18231880972009252.0  60605228702221920.0 101291842930249760.0;
                        -30475770808580480.0 101291842930249728.0 169294411240851968.0])
         @test exp(A2) ≈ eA2
+        @test exp(adjoint(A2)) ≈ adjoint(eA2)
+        @test exp(transpose(A2)) ≈ transpose(eA2)
 
         A3  = convert(Matrix{elty}, [-131 19 18;-390 56 54;-387 57 52])
         eA3 = convert(Matrix{elty}, [-1.50964415879218 -5.6325707998812  -4.934938326092;
                                       0.367879439109187 1.47151775849686  1.10363831732856;
                                       0.135335281175235 0.406005843524598 0.541341126763207]')
         @test exp(A3) ≈ eA3
+        @test exp(adjoint(A3)) ≈ adjoint(eA3)
+        @test exp(transpose(A3)) ≈ transpose(eA3)
 
         A4 = convert(Matrix{elty}, [0.25 0.25; 0 0])
         eA4 = convert(Matrix{elty}, [1.2840254166877416 0.2840254166877415; 0 1])
         @test exp(A4) ≈ eA4
+        @test exp(adjoint(A4)) ≈ adjoint(eA4)
+        @test exp(transpose(A4)) ≈ transpose(eA4)
 
         A5 = convert(Matrix{elty}, [0 0.02; 0 0])
         eA5 = convert(Matrix{elty}, [1 0.02; 0 1])
         @test exp(A5) ≈ eA5
+        @test exp(adjoint(A5)) ≈ adjoint(eA5)
+        @test exp(transpose(A5)) ≈ transpose(eA5)
 
         # Hessenberg
         @test hessenberg(A1).H ≈ convert(Matrix{elty},
                                                  [4.000000000000000  -1.414213562373094  -1.414213562373095
                                                   -1.414213562373095   4.999999999999996  -0.000000000000000
                                                   0  -0.000000000000002   3.000000000000000])
+
+        # cis always returns a complex matrix
+        if elty <: Real
+            eltyim = Complex{elty}
+        else
+            eltyim = elty
+        end
+
+        @test cis(A1) ≈ convert(Matrix{eltyim}, [-0.339938 + 0.000941506im   0.772659  - 0.8469im     0.52745  + 0.566543im;
+                                                  0.650054 - 0.140179im     -0.0762135 + 0.284213im   0.38633  - 0.42345im ;
+                                                  0.650054 - 0.140179im      0.913779  + 0.143093im  -0.603663 - 0.28233im ]) rtol=7e-7
     end
 
     @testset "Additional tests for $elty" for elty in (Float64, ComplexF64)
@@ -485,15 +524,23 @@ end
                                      1/4 1/5 1/6 1/7;
                                      1/5 1/6 1/7 1/8])
         @test exp(log(A4)) ≈ A4
+        @test exp(log(transpose(A4))) ≈ transpose(A4)
+        @test exp(log(adjoint(A4))) ≈ adjoint(A4)
 
         A5  = convert(Matrix{elty}, [1 1 0 1; 0 1 1 0; 0 0 1 1; 1 0 0 1])
         @test exp(log(A5)) ≈ A5
+        @test exp(log(transpose(A5))) ≈ transpose(A5)
+        @test exp(log(adjoint(A5))) ≈ adjoint(A5)
 
         A6  = convert(Matrix{elty}, [-5 2 0 0 ; 1/2 -7 3 0; 0 1/3 -9 4; 0 0 1/4 -11])
         @test exp(log(A6)) ≈ A6
+        @test exp(log(transpose(A6))) ≈ transpose(A6)
+        @test exp(log(adjoint(A6))) ≈ adjoint(A6)
 
         A7  = convert(Matrix{elty}, [1 0 0 1e-8; 0 1 0 0; 0 0 1 0; 0 0 0 1])
         @test exp(log(A7)) ≈ A7
+        @test exp(log(transpose(A7))) ≈ transpose(A7)
+        @test exp(log(adjoint(A7))) ≈ adjoint(A7)
     end
 
     @testset "Integer promotion tests" begin
@@ -560,8 +607,13 @@ end
             @test cos(A) ≈ cos(-A)
             @test sin(A) ≈ -sin(-A)
             @test tan(A) ≈ sin(A) / cos(A)
+
             @test cos(A) ≈ real(exp(im*A))
             @test sin(A) ≈ imag(exp(im*A))
+            @test cos(A) ≈ real(cis(A))
+            @test sin(A) ≈ imag(cis(A))
+            @test cis(A) ≈ cos(A) + im * sin(A)
+
             @test cosh(A) ≈ 0.5 * (exp(A) + exp(-A))
             @test sinh(A) ≈ 0.5 * (exp(A) - exp(-A))
             @test cosh(A) ≈ cosh(-A)
@@ -605,6 +657,9 @@ end
 
         @test cos(A5) ≈ 0.5 * (exp(im*A5) + exp(-im*A5))
         @test sin(A5) ≈ -0.5im * (exp(im*A5) - exp(-im*A5))
+        @test cos(A5) ≈ 0.5 * (cis(A5) + cis(-A5))
+        @test sin(A5) ≈ -0.5im * (cis(A5) - cis(-A5))
+
         @test cosh(A5) ≈ 0.5 * (exp(A5) + exp(-A5))
         @test sinh(A5) ≈ 0.5 * (exp(A5) - exp(-A5))
     end
@@ -870,6 +925,38 @@ end
     end
 end
 
+@testset "issue #40141" begin
+    x = [-1 -eps() 0 0; eps() -1 0 0; 0 0 -1 -eps(); 0 0 eps() -1]
+    @test sqrt(x)^2 ≈ x
+
+    x2 =  [-1 -eps() 0 0; 3eps() -1 0 0; 0 0 -1 -3eps(); 0 0 eps() -1]
+    @test sqrt(x2)^2 ≈ x2
+
+    x3 = [-1 -eps() 0 0; eps() -1 0 0; 0 0 -1 -eps(); 0 0 eps() Inf]
+    @test all(isnan, sqrt(x3))
+
+    # test overflow/underflow handled
+    x4 = [0 -1e200; 1e200 0]
+    @test sqrt(x4)^2 ≈ x4
+
+    x5 = [0 -1e-200; 1e-200 0]
+    @test sqrt(x5)^2 ≈ x5
+
+    x6 = [1.0 1e200; -1e-200 1.0]
+    @test sqrt(x6)^2 ≈ x6
+end
+
+@testset "matrix logarithm block diagonal underflow/overflow" begin
+    x1 = [0 -1e200; 1e200 0]
+    @test exp(log(x1)) ≈ x1
+
+    x2 = [0 -1e-200; 1e-200 0]
+    @test exp(log(x2)) ≈ x2
+
+    x3 = [1.0 1e200; -1e-200 1.0]
+    @test exp(log(x3)) ≈ x3
+end
+
 @testset "issue #7181" begin
     A = [ 1  5  9
           2  6 10
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index 043b2181a7908f..d782fd358bad59 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -88,7 +88,7 @@ Random.seed!(1)
             @test func(D) ≈ func(DM) atol=n^2*eps(relty)*(1+(elty<:Complex))
         end
         if relty <: BlasFloat
-            for func in (exp, sinh, cosh, tanh, sech, csch, coth)
+            for func in (exp, cis, sinh, cosh, tanh, sech, csch, coth)
                 @test func(D) ≈ func(DM) atol=n^3*eps(relty)
             end
             @test log(Diagonal(abs.(D.diag))) ≈ log(abs.(DM)) atol=n^3*eps(relty)
@@ -102,6 +102,10 @@ Random.seed!(1)
         end
     end
 
+    @testset "Two-dimensional Euler formula for Diagonal" begin
+        @test cis(Diagonal([π, π])) ≈ -I
+    end
+
     @testset "Linear solve" begin
         for (v, U) in ((vv, UU), (view(vv, 1:n), view(UU, 1:n, 1:2)))
             @test D*v ≈ DM*v atol=n*eps(relty)*(1+(elty<:Complex))
@@ -172,12 +176,24 @@ Random.seed!(1)
             @test Array(a*D) ≈ a*DM
             @test Array(D*a) ≈ DM*a
             @test Array(D/a) ≈ DM/a
-            if relty <: BlasFloat
-                for b in (rand(elty,n,n), sparse(rand(elty,n,n)), rand(elty,n), sparse(rand(elty,n)))
-                    @test lmul!(copy(D), copy(b)) ≈ Array(D)*Array(b)
-                    @test lmul!(transpose(copy(D)), copy(b)) ≈ transpose(Array(D))*Array(b)
-                    @test lmul!(adjoint(copy(D)), copy(b)) ≈ Array(D)'*Array(b)
-                end
+            if elty <: Real
+                @test Array(abs.(D)^a) ≈ abs.(DM)^a
+            else
+                @test Array(D^a) ≈ DM^a
+            end
+            @test Diagonal(1:100)^2 == Diagonal((1:100).^2)
+            p = 3
+            @test Diagonal(1:100)^p == Diagonal((1:100).^p)
+            @test Diagonal(1:100)^(-1) == Diagonal(inv.(1:100))
+            @test Diagonal(1:100)^2.0 == Diagonal((1:100).^2.0)
+            @test Diagonal(1:100)^(2.0+0im) == Diagonal((1:100).^(2.0+0im))
+        end
+
+        if relty <: BlasFloat
+            for b in (rand(elty,n,n), sparse(rand(elty,n,n)), rand(elty,n), sparse(rand(elty,n)))
+                @test lmul!(copy(D), copy(b)) ≈ Array(D)*Array(b)
+                @test lmul!(transpose(copy(D)), copy(b)) ≈ transpose(Array(D))*Array(b)
+                @test lmul!(adjoint(copy(D)), copy(b)) ≈ Array(D)'*Array(b)
             end
         end
 
@@ -213,14 +229,14 @@ Random.seed!(1)
                 @test Array(op(Dr, Aherm)) ≈ Array(Hermitian(op(Array(Dr), Array(Aherm))))
             end
         end
-        @test Array(D*Transpose(Asym)) ≈ Array(D) * Array(transpose(Asym))
-        @test Array(D*Adjoint(Asym)) ≈ Array(D) * Array(adjoint(Asym))
-        @test Array(D*Transpose(Aherm)) ≈ Array(D) * Array(transpose(Aherm))
-        @test Array(D*Adjoint(Aherm)) ≈ Array(D) * Array(adjoint(Aherm))
-        @test Array(Transpose(Asym)*Transpose(D)) ≈ Array(transpose(Asym)) * Array(transpose(D))
-        @test Array(Transpose(D)*Transpose(Asym)) ≈ Array(transpose(D)) * Array(transpose(Asym))
-        @test Array(Adjoint(Aherm)*Adjoint(D)) ≈ Array(adjoint(Aherm)) * Array(adjoint(D))
-        @test Array(Adjoint(D)*Adjoint(Aherm)) ≈ Array(adjoint(D)) * Array(adjoint(Aherm))
+        @test Array(D*transpose(Asym)) ≈ Array(D) * Array(transpose(Asym))
+        @test Array(D*adjoint(Asym)) ≈ Array(D) * Array(adjoint(Asym))
+        @test Array(D*transpose(Aherm)) ≈ Array(D) * Array(transpose(Aherm))
+        @test Array(D*adjoint(Aherm)) ≈ Array(D) * Array(adjoint(Aherm))
+        @test Array(transpose(Asym)*transpose(D)) ≈ Array(transpose(Asym)) * Array(transpose(D))
+        @test Array(transpose(D)*transpose(Asym)) ≈ Array(transpose(D)) * Array(transpose(Asym))
+        @test Array(adjoint(Aherm)*adjoint(D)) ≈ Array(adjoint(Aherm)) * Array(adjoint(D))
+        @test Array(adjoint(D)*adjoint(Aherm)) ≈ Array(adjoint(D)) * Array(adjoint(Aherm))
 
         # Performance specialisations for A*_mul_B!
         vvv = similar(vv)
@@ -230,7 +246,7 @@ Random.seed!(1)
 
         UUU = similar(UU)
         for transformA in (identity, adjoint, transpose)
-            for transformD in (identity, Adjoint, Transpose, adjoint, transpose)
+            for transformD in (identity, adjoint, transpose)
                 @test mul!(UUU, transformA(UU), transformD(D)) ≈  transformA(UU) * Matrix(transformD(D))
                 @test mul!(UUU, transformD(D), transformA(UU)) ≈  Matrix(transformD(D)) * transformA(UU)
             end
@@ -295,6 +311,10 @@ Random.seed!(1)
         M4 = rand(elty, n÷2, n÷2)
         @test kron(D3, M4) ≈ kron(DM3, M4)
         @test kron(M4, D3) ≈ kron(M4, DM3)
+        X = [ones(1,1) for i in 1:2, j in 1:2]
+        @test kron(I(2), X)[1,3] == zeros(1,1)
+        X = [ones(2,2) for i in 1:2, j in 1:2]
+        @test kron(I(2), X)[1,3] == zeros(2,2)
     end
     @testset "iszero, isone, triu, tril" begin
         Dzero = Diagonal(zeros(elty, 10))
@@ -395,6 +415,28 @@ Random.seed!(1)
 
 end
 
+@testset "rdiv! (#40887)" begin
+    @test rdiv!(Matrix(Diagonal([2.0, 3.0])), Diagonal(2:3)) == Diagonal([1.0, 1.0])
+    @test rdiv!(fill(3.0, 3, 3), 3.0I(3)) == ones(3,3)
+end
+
+@testset "kron (issue #40595)" begin
+    # custom array type to test that kron on Diagonal matrices preserves types of the parents if possible
+    struct KronTestArray{T, N, AT} <: AbstractArray{T, N}
+        data::AT
+    end
+    KronTestArray(data::AbstractArray) = KronTestArray{eltype(data), ndims(data), typeof(data)}(data)
+    Base.size(A::KronTestArray) = size(A.data)
+    LinearAlgebra.kron(A::KronTestArray, B::KronTestArray) = KronTestArray(kron(A.data, B.data))
+    Base.getindex(K::KronTestArray{<:Any,N}, i::Vararg{Int,N}) where {N} = K.data[i...]
+
+    A = KronTestArray([1, 2, 3]);
+    @test kron(A, A) isa KronTestArray
+    Ad = Diagonal(A);
+    @test kron(Ad, Ad).diag isa KronTestArray
+    @test kron(Ad, Ad).diag == kron([1, 2, 3], [1, 2, 3])
+end
+
 @testset "svdvals and eigvals (#11120/#11247)" begin
     D = Diagonal(Matrix{Float64}[randn(3,3), randn(2,2)])
     @test sort([svdvals(D)...;], rev = true) ≈ svdvals([D.diag[1] zeros(3,2); zeros(2,3) D.diag[2]])
@@ -540,7 +582,7 @@ end
     D = Diagonal(randn(5))
     Q = qr(randn(5, 5)).Q
     @test D * Q' == Array(D) * Q'
-    Q = qr(randn(5, 5), Val(true)).Q
+    Q = qr(randn(5, 5), ColumnNorm()).Q
     @test_throws ArgumentError lmul!(Q, D)
 end
 
@@ -568,6 +610,7 @@ end
     @test ishermitian(Dsym) == false
 
     @test exp(D) == Diagonal([exp([1 2; 3 4]), exp([1 2; 3 4])])
+    @test cis(D) == Diagonal([cis([1 2; 3 4]), cis([1 2; 3 4])])
     @test log(D) == Diagonal([log([1 2; 3 4]), log([1 2; 3 4])])
     @test sqrt(D) == Diagonal([sqrt([1 2; 3 4]), sqrt([1 2; 3 4])])
 
@@ -618,15 +661,13 @@ end
         fullBB = copyto!(Matrix{Matrix{T}}(undef, 2, 2), BB)
         for (transform1, transform2) in ((identity,  identity),
                 (identity,  adjoint  ), (adjoint,   identity ), (adjoint,   adjoint  ),
-                (identity,  transpose), (transpose, identity ), (transpose, transpose),
-                (identity,  Adjoint  ), (Adjoint,   identity ), (Adjoint,   Adjoint  ),
-                (identity,  Transpose), (Transpose, identity ), (Transpose, Transpose))
+                (identity,  transpose), (transpose, identity ), (transpose, transpose))
             @test *(transform1(D), transform2(B))::typeof(D) ≈ *(transform1(Matrix(D)), transform2(Matrix(B))) atol=2 * eps()
             @test *(transform1(DD), transform2(BB))::typeof(DD) == *(transform1(fullDD), transform2(fullBB))
         end
         M = randn(T, 5, 5)
         MM = [randn(T, 2, 2) for _ in 1:2, _ in 1:2]
-        for transform in (identity, adjoint, transpose, Adjoint, Transpose)
+        for transform in (identity, adjoint, transpose)
             @test lmul!(transform(D), copy(M)) ≈ *(transform(Matrix(D)), M)
             @test rmul!(copy(M), transform(D)) ≈ *(M, transform(Matrix(D)))
             @test lmul!(transform(DD), copy(MM)) ≈ *(transform(fullDD), MM)
@@ -640,7 +681,7 @@ end
     @test Diagonal(transpose([1, 2, 3])) == Diagonal([1 2 3])
 end
 
-@testset "Multiplication with Adjoint and Transpose vectors (#26863)" begin
+@testset "Multiplication with adjoint and transpose vectors (#26863)" begin
     x = collect(1:2)
     xt = transpose(x)
     A = reshape([[1 2; 3 4], zeros(Int,2,2), zeros(Int, 2, 2), [5 6; 7 8]], 2, 2)
@@ -653,13 +694,15 @@ end
     @test yt*D*y == (yt*D)*y == (yt*A)*y
 end
 
-@testset "Multiplication of single element Diagonal (#36746)" begin
+@testset "Multiplication of single element Diagonal (#36746, #40726)" begin
     @test_throws DimensionMismatch Diagonal(randn(1)) * randn(5)
     @test_throws DimensionMismatch Diagonal(randn(1)) * Diagonal(randn(3, 3))
     A = [1 0; 0 2]
     v = [3, 4]
     @test Diagonal(A) * v == A * v
     @test Diagonal(A) * Diagonal(A) == A * A
+    @test_throws DimensionMismatch [1 0;0 1] * Diagonal([2 3])   # Issue #40726
+    @test_throws DimensionMismatch lmul!(Diagonal([1]), [1,2,3]) # nearby
 end
 
 @testset "Triangular division by Diagonal #27989" begin
@@ -735,6 +778,18 @@ end
     @test dot(zeros(Int32, 0), Diagonal(zeros(Int, 0)), zeros(Int16, 0)) === 0
 end
 
+@testset "Diagonal(undef)" begin
+    d = Diagonal{Float32}(undef, 2)
+    @test length(d.diag) == 2
+end
+
+@testset "permutedims (#39447)" begin
+    for D in (Diagonal(zeros(5)), Diagonal(zeros(5) .+ 1im), Diagonal([[1,2],[3,4]]))
+        @test permutedims(D) === permutedims(D,(1,2)) === permutedims(D,(2,1)) === D
+        @test_throws ArgumentError permutedims(D,(1,3))
+    end
+end
+
 @testset "Inner product" begin
     A = Diagonal(rand(10) .+ im)
     B = Diagonal(rand(10) .+ im)
@@ -744,4 +799,31 @@ end
     @test dot(A, B) ≈ conj(dot(B, A))
 end
 
+@testset "eltype relaxation(#41015)" begin
+    A = rand(3,3)
+    for trans in (identity, adjoint, transpose)
+        @test ldiv!(trans(I(3)), A) == A
+        @test rdiv!(A, trans(I(3))) == A
+    end
+end
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    d = ImmutableArray([1, 2, 3, 4])
+    D = Diagonal(d)
+
+    @test convert(AbstractArray{Float64}, D)::Diagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == D
+    @test convert(AbstractMatrix{Float64}, D)::Diagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == D
+end
+
+@testset "divisions functionality" for elty in (Int, Float64, ComplexF64)
+    B = Diagonal(rand(elty,5,5))
+    x = rand(elty)
+    @test \(x, B) == /(B, x)
+end
+
 end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl
index fd9f7dfba92ee9..88a8048b52f314 100644
--- a/stdlib/LinearAlgebra/test/eigen.jl
+++ b/stdlib/LinearAlgebra/test/eigen.jl
@@ -11,7 +11,7 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(1234321)
+Random.seed!(12343219)
 
 areal = randn(n,n)/2
 aimg  = randn(n,n)/2
@@ -170,5 +170,37 @@ end
     @test eigmax(A') == eigmax(copy(A'))
 end
 
+@testset "equality of eigen factorizations" begin
+    A = randn(3, 3)
+    @test eigen(A) == eigen(A)
+    @test hash(eigen(A)) == hash(eigen(A))
+    @test isequal(eigen(A), eigen(A))
+end
+
+@testset "Float16" begin
+    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
+    B = eigen(A)
+    B32 = eigen(Float32.(A))
+    C = Float16[3 -2; 4 -1]
+    D = eigen(C)
+    D32 = eigen(Float32.(C))
+    F = eigen(complex(C))
+    F32 = eigen(complex(Float32.(C)))
+    @test B isa Eigen{Float16, Float16, Matrix{Float16}, Vector{Float16}}
+    @test B.values isa Vector{Float16}
+    @test B.vectors isa Matrix{Float16}
+    @test B.values ≈ B32.values
+    @test B.vectors ≈ B32.vectors
+    @test D isa Eigen{ComplexF16, ComplexF16, Matrix{ComplexF16}, Vector{ComplexF16}}
+    @test D.values isa Vector{ComplexF16}
+    @test D.vectors isa Matrix{ComplexF16}
+    @test D.values ≈ D32.values
+    @test D.vectors ≈ D32.vectors
+    @test F isa Eigen{ComplexF16, ComplexF16, Matrix{ComplexF16}, Vector{ComplexF16}}
+    @test F.values isa Vector{ComplexF16}
+    @test F.vectors isa Matrix{ComplexF16}
+    @test F.values ≈ F32.values
+    @test F.vectors ≈ F32.vectors
+end
 
 end # module TestEigen
diff --git a/stdlib/LinearAlgebra/test/factorization.jl b/stdlib/LinearAlgebra/test/factorization.jl
new file mode 100644
index 00000000000000..6a9226d80cdf6b
--- /dev/null
+++ b/stdlib/LinearAlgebra/test/factorization.jl
@@ -0,0 +1,60 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module TestFactorization
+using Test, LinearAlgebra
+
+@testset "equality for factorizations - $f" for f in Any[
+    bunchkaufman,
+    cholesky,
+    x -> cholesky(x, Val(true)),
+    eigen,
+    hessenberg,
+    lq,
+    lu,
+    qr,
+    x -> qr(x, ColumnNorm()),
+    svd,
+    schur,
+]
+    A = randn(3, 3)
+    A = A * A' # ensure A is pos. def. and symmetric
+    F, G = f(A), f(A)
+
+    @test F == G
+    @test isequal(F, G)
+    @test hash(F) == hash(G)
+
+    f === hessenberg && continue
+
+    # change all arrays in F to have eltype Float32
+    F = typeof(F).name.wrapper(Base.mapany(1:nfields(F)) do i
+        x = getfield(F, i)
+        return x isa AbstractArray{Float64} ? Float32.(x) : x
+    end...)
+    # round all arrays in G to the nearest Float64 representable as Float32
+    G = typeof(G).name.wrapper(Base.mapany(1:nfields(G)) do i
+        x = getfield(G, i)
+        return x isa AbstractArray{Float64} ? Float64.(Float32.(x)) : x
+    end...)
+
+    @test F == G broken=!(f === eigen || f === qr)
+    @test isequal(F, G) broken=!(f === eigen || f === qr)
+    @test hash(F) == hash(G)
+end
+
+@testset "equality of QRCompactWY" begin
+    A = rand(100, 100)
+    F, G = qr(A), qr(A)
+
+    @test F == G
+    @test isequal(F, G)
+    @test hash(F) == hash(G)
+
+    G.T[28, 100] = 42
+
+    @test F != G
+    @test !isequal(F, G)
+    @test hash(F) != hash(G)
+end
+
+end
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
index 0c1365b11679ff..489b96be56019a 100644
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ b/stdlib/LinearAlgebra/test/generic.jl
@@ -142,6 +142,10 @@ end
         @testset "Scaling with 5-argument mul!" begin
             @test mul!(copy(a), 5., a, 10, 100) == a*150
             @test mul!(copy(a), a, 5., 10, 100) == a*150
+            @test mul!(vec(copy(a)), 5., a, 10, 100) == vec(a*150)
+            @test mul!(vec(copy(a)), a, 5., 10, 100) == vec(a*150)
+            @test_throws DimensionMismatch mul!([vec(copy(a)); 0], 5., a, 10, 100)
+            @test_throws DimensionMismatch mul!([vec(copy(a)); 0], a, 5., 10, 100)
             @test mul!(copy(a), Diagonal([1.; 2.]), a, 10, 100) == 10a.*[1; 2] .+ 100a
             @test mul!(copy(a), Diagonal([1; 2]), a, 10, 100)   == 10a.*[1; 2] .+ 100a
             @test mul!(copy(a), a, Diagonal(1.:an), 10, 100) == 10a.*Vector(1:an)' .+ 100a
@@ -189,6 +193,7 @@ end
         @test det(a) == a
         @test norm(a) == abs(a)
         @test norm(a, 0) == 1
+        @test norm(0, 0) == 0
     end
 
     @test !issymmetric(NaN16)
@@ -209,6 +214,19 @@ end
 @test norm([2.4e-322, 4.4e-323], 3) ≈ 2.4e-322
 @test_throws ArgumentError opnorm(Matrix{Float64}(undef,5,5),5)
 
+# operator norm for zero-dimensional domain is zero (see #40370)
+@testset "opnorm" begin
+    for m in (0, 1, 2)
+        @test @inferred(opnorm(fill(1,0,m))) == 0.0
+        @test @inferred(opnorm(fill(1,m,0))) == 0.0
+    end
+    for m in (1, 2)
+        @test @inferred(opnorm(fill(1im,1,m))) ≈ sqrt(m)
+        @test @inferred(opnorm(fill(1im,m,1))) ≈ sqrt(m)
+    end
+    @test @inferred(opnorm(fill(1,2,2))) ≈ 2
+end
+
 @testset "generic norm for arrays of arrays" begin
     x = Vector{Int}[[1,2], [3,4]]
     @test @inferred(norm(x)) ≈ sqrt(30)
@@ -228,12 +246,14 @@ end
     rotate!(x, y, c, s)
     @test x ≈ c*x2 + s*y2
     @test y ≈ -conj(s)*x2 + c*y2
+    @test_throws DimensionMismatch rotate!([x; x], y, c, s)
 
     x3 = copy(x)
     y3 = copy(y)
     reflect!(x, y, c, s)
     @test x ≈ c*x3 + s*y3
     @test y ≈ conj(s)*x3 - c*y3
+    @test_throws DimensionMismatch reflect!([x; x], y, c, s)
 end
 
 @testset "LinearAlgebra.axp(b)y! for element type without commutative multiplication" begin
@@ -257,6 +277,7 @@ end
     ry = [2 8]
     @test LinearAlgebra.axpy!(α, x, rx, y, ry) == [1 1 1 1; 11 1 1 26]
 end
+
 @testset "norm and normalize!" begin
     vr = [3.0, 4.0]
     for Tr in (Float32, Float64)
@@ -334,6 +355,11 @@ end
     @test [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]] ≈ [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]
 end
 
+@testset "Issue 40128" begin
+    @test det(BigInt[9 1 8 0; 0 0 8 7; 7 6 8 3; 2 9 7 7])::BigInt == -1
+    @test det(BigInt[1 big(2)^65+1; 3 4])::BigInt == (4 - 3*(big(2)^65+1))
+end
+
 # Minimal modulo number type - but not subtyping Number
 struct ModInt{n}
     k
@@ -361,13 +387,13 @@ LinearAlgebra.Transpose(a::ModInt{n}) where {n} = transpose(a)
     A = [ModInt{2}(1) ModInt{2}(0); ModInt{2}(1) ModInt{2}(1)]
     b = [ModInt{2}(1), ModInt{2}(0)]
 
-    @test A*(lu(A, Val(false))\b) == b
+    @test A*(lu(A, NoPivot())\b) == b
 
     # Needed for pivoting:
     Base.abs(a::ModInt{n}) where {n} = a
     Base.:<(a::ModInt{n}, b::ModInt{n}) where {n} = a.k < b.k
 
-    @test A*(lu(A, Val(true))\b) == b
+    @test A*(lu(A, RowMaximum())\b) == b
 end
 
 @testset "Issue 18742" begin
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
index 62cafecb576092..65dc0290605969 100644
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ b/stdlib/LinearAlgebra/test/hessenberg.jl
@@ -4,6 +4,10 @@ module TestHessenberg
 
 using Test, LinearAlgebra, Random
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
+using .Main.Furlongs
+
 # for tuple tests below
 ≅(x,y) = all(p -> p[1] ≈ p[2], zip(x,y))
 
@@ -55,6 +59,54 @@ let n = 10
         H = UpperHessenberg(Areal)
         @test Array(Hc + H) == Array(Hc) + Array(H)
         @test Array(Hc - H) == Array(Hc) - Array(H)
+        @testset "Preserve UpperHessenberg shape (issue #39388)" begin
+            for H = (UpperHessenberg(Areal), UpperHessenberg(Furlong.(Areal)))
+                if eltype(H) <: Furlong
+                    A = Furlong.(rand(n,n))
+                    d = Furlong.(rand(n))
+                    dl = Furlong.(rand(n-1))
+                    du = Furlong.(rand(n-1))
+                    us = Furlong(1)*I
+                else
+                    A = rand(n,n)
+                    d = rand(n)
+                    dl = rand(n-1)
+                    du = rand(n-1)
+                    us = 1*I
+                end
+                @testset "$op" for op = (+,-)
+                    for x = (us, Diagonal(d), Bidiagonal(d,dl,:U), Bidiagonal(d,dl,:L),
+                             Tridiagonal(dl,d,du), SymTridiagonal(d,dl),
+                             UpperTriangular(A), UnitUpperTriangular(A))
+                        @test op(H,x) == op(Array(H),x)
+                        @test op(x,H) == op(x,Array(H))
+                        @test op(H,x) isa UpperHessenberg
+                        @test op(x,H) isa UpperHessenberg
+                    end
+                end
+                A = randn(n,n)
+                d = randn(n)
+                dl = randn(n-1)
+                @testset "Multiplication/division" begin
+                    for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
+                             UpperTriangular(A), UnitUpperTriangular(A))
+                        @test H*x == Array(H)*x broken = eltype(H) <: Furlong && x isa Bidiagonal
+                        @test x*H == x*Array(H) broken = eltype(H) <: Furlong && x isa Bidiagonal
+                        @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, Diagonal, UpperTriangular}
+                        @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, Diagonal, UpperTriangular}
+                        @test H*x isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Bidiagonal
+                        @test x*H isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Bidiagonal
+                        @test H/x isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, Diagonal}
+                        @test x\H isa UpperHessenberg broken = eltype(H) <: Furlong && x isa Union{Bidiagonal, Diagonal}
+                    end
+                    x = Bidiagonal(d, dl, :L)
+                    @test H*x == Array(H)*x
+                    @test x*H == x*Array(H)
+                    @test H/x == Array(H)/x broken = eltype(H) <: Furlong
+                    @test_broken x\H == x\Array(H) # issue 40037
+                end
+            end
+        end
     end
 
     @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int), herm in (false, true)
@@ -133,4 +185,27 @@ end
     @test Base.propertynames(F, true) == (:Q, :H, :μ, :τ, :factors, :uplo)
 end
 
+@testset "adjoint of Hessenberg" begin
+    Ar = randn(5, 5)
+    Ac = complex.(randn(5, 5), randn(5, 5))
+    b = ones(size(Ar, 1))
+
+    for A in (Ar, Ac)
+        F = hessenberg(A)
+        @test A'\b ≈ F'\b
+    end
+end
+
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    A = ImmutableArray([1 2 3; 4 5 6; 7 8 9])
+    H = UpperHessenberg(A)
+
+    @test convert(AbstractArray{Float64}, H)::UpperHessenberg{Float64,ImmutableArray{Float64,2,Array{Float64,2}}} == H
+    @test convert(AbstractMatrix{Float64}, H)::UpperHessenberg{Float64,ImmutableArray{Float64,2,Array{Float64,2}}} == H
+end
+
 end # module TestHessenberg
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
index ccf8cf980f1c1e..6453f0f5210927 100644
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ b/stdlib/LinearAlgebra/test/lapack.jl
@@ -407,10 +407,10 @@ end
     @testset for elty in (Float32, Float64)
         d = rand(elty,10)
         e = rand(elty,9)
-        @test_throws DimensionMismatch LAPACK.stev!('U',d,rand(elty,10))
+        @test_throws DimensionMismatch LAPACK.stev!('U',d,rand(elty,11))
         @test_throws DimensionMismatch LAPACK.stebz!('A','B',zero(elty),zero(elty),0,0,-1.,d,rand(elty,10))
         @test_throws DimensionMismatch LAPACK.stegr!('N','A',d,rand(elty,11),zero(elty),zero(elty),0,0)
-        @test_throws DimensionMismatch LAPACK.stein!(d,zeros(elty,10),zeros(elty,10),zeros(BlasInt,10),zeros(BlasInt,10))
+        @test_throws DimensionMismatch LAPACK.stein!(d,zeros(elty,11),zeros(elty,10),zeros(BlasInt,10),zeros(BlasInt,10))
         @test_throws DimensionMismatch LAPACK.stein!(d,e,zeros(elty,11),zeros(BlasInt,10),zeros(BlasInt,10))
     end
 end
diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl
index 6e21b9ac6c1ccd..b054621e113138 100644
--- a/stdlib/LinearAlgebra/test/lq.jl
+++ b/stdlib/LinearAlgebra/test/lq.jl
@@ -40,7 +40,7 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                 lqa   = lq(a)
                 x = lqa\b
                 l,q   = lqa.L, lqa.Q
-                qra   = qr(a, Val(true))
+                qra   = qr(a, ColumnNorm())
                 @testset "Basic ops" begin
                     @test size(lqa,1) == size(a,1)
                     @test size(lqa,3) == 1
@@ -56,9 +56,6 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                     @test l*q ≈ a
                     @test Array(lqa) ≈ a
                     @test Array(copy(lqa)) ≈ a
-                    lstring = sprint(show, l, context = :compact=>true)
-                    qstring = sprint(show, q, context = :compact=>true)
-                    @test sprint(show,MIME"text/plain"(),lqa) == "$(typeof(lqa)) with factors L and Q:\n$lstring\n$qstring"
                     @test LinearAlgebra.Factorization{eltya}(lqa) === lqa
                     @test Matrix{eltya}(q) isa Matrix{eltya}
                     # test Array{T}(LQPackedQ{T})
@@ -203,4 +200,42 @@ end
     end
 end
 
+@testset "REPL printing" begin
+    bf = IOBuffer()
+    show(bf, "text/plain", lq(Matrix(I, 4, 4)))
+    seekstart(bf)
+    @test String(take!(bf)) == """
+LinearAlgebra.LQ{Float64, Matrix{Float64}}
+L factor:
+4×4 Matrix{Float64}:
+ 1.0  0.0  0.0  0.0
+ 0.0  1.0  0.0  0.0
+ 0.0  0.0  1.0  0.0
+ 0.0  0.0  0.0  1.0
+Q factor:
+4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}}:
+ 1.0  0.0  0.0  0.0
+ 0.0  1.0  0.0  0.0
+ 0.0  0.0  1.0  0.0
+ 0.0  0.0  0.0  1.0"""
+end
+
+@testset "adjoint of LQ" begin
+    n = 5
+
+    for b in (ones(n), ones(n, 2), ones(Complex{Float64}, n, 2))
+        for A in (
+            randn(n, n),
+            # Tall problems become least squares problems similarly to QR
+            randn(n - 2, n),
+            complex.(randn(n, n), randn(n, n)))
+
+            F = lq(A)
+            @test A'\b ≈ F'\b
+        end
+        @test_throws DimensionMismatch lq(randn(n, n + 2))'\b
+    end
+
+end
+
 end # module TestLQ
diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl
index 8e6c06cdbd12e5..0dffe7fa1738f8 100644
--- a/stdlib/LinearAlgebra/test/lu.jl
+++ b/stdlib/LinearAlgebra/test/lu.jl
@@ -11,7 +11,7 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(1234321)
+Random.seed!(1234324)
 
 areal = randn(n,n)/2
 aimg  = randn(n,n)/2
@@ -37,7 +37,7 @@ dimg  = randn(n)/2
     else
         convert(Tridiagonal{eltya}, Tridiagonal(dlreal, dreal, dureal))
     end
-    ε = εa = eps(abs(float(one(eltya))))
+    εa = eps(abs(float(one(eltya))))
 
     if eltya <: BlasFloat
         @testset "LU factorization for Number" begin
@@ -61,7 +61,7 @@ dimg  = randn(n)/2
         lua   = factorize(a)
         @test_throws ErrorException lua.Z
         l,u,p = lua.L, lua.U, lua.p
-        ll,ul,pl = lu(a)
+        ll,ul,pl = @inferred lu(a)
         @test ll * ul ≈ a[pl,:]
         @test l*u ≈ a[p,:]
         @test (l*u)[invperm(p),:] ≈ a
@@ -71,7 +71,7 @@ dimg  = randn(n)/2
             # test conversion of LU factorization's numerical type
             bft = eltya <: Real ? LinearAlgebra.LU{BigFloat} : LinearAlgebra.LU{Complex{BigFloat}}
             bflua = convert(bft, lua)
-            @test bflua.L*bflua.U ≈ big.(a)[p,:] rtol=ε
+            @test bflua.L*bflua.U ≈ big.(a)[p,:] rtol=εa*norm(a)
             @test Factorization{eltya}(lua) === lua
             # test Factorization with different eltype
             if eltya <: BlasReal
@@ -85,9 +85,9 @@ dimg  = randn(n)/2
     end
     κd    = cond(Array(d),1)
     @testset "Tridiagonal LU" begin
-        lud   = lu(d)
+        lud = @inferred lu(d)
         @test LinearAlgebra.issuccess(lud)
-        @test lu(lud) == lud
+        @test @inferred(lu(lud)) == lud
         @test_throws ErrorException lud.Z
         @test lud.L*lud.U ≈ lud.P*Array(d)
         @test lud.L*lud.U ≈ Array(d)[lud.p,:]
@@ -199,14 +199,14 @@ dimg  = randn(n)/2
             @test lua.L*lua.U ≈ lua.P*a[:,1:n1]
         end
         @testset "Fat LU" begin
-            lua   = lu(a[1:n1,:])
+            lua   = @inferred lu(a[1:n1,:])
             @test lua.L*lua.U ≈ lua.P*a[1:n1,:]
         end
     end
 
     @testset "LU of Symmetric/Hermitian" begin
         for HS in (Hermitian(a'a), Symmetric(a'a))
-            luhs = lu(HS)
+            luhs = @inferred lu(HS)
             @test luhs.L*luhs.U ≈ luhs.P*Matrix(HS)
         end
     end
@@ -229,19 +229,19 @@ end
     @test_throws SingularException lu!(copy(A); check = true)
     @test !issuccess(lu(A; check = false))
     @test !issuccess(lu!(copy(A); check = false))
-    @test_throws ZeroPivotException lu(A, Val(false))
-    @test_throws ZeroPivotException lu!(copy(A), Val(false))
-    @test_throws ZeroPivotException lu(A, Val(false); check = true)
-    @test_throws ZeroPivotException lu!(copy(A), Val(false); check = true)
-    @test !issuccess(lu(A, Val(false); check = false))
-    @test !issuccess(lu!(copy(A), Val(false); check = false))
+    @test_throws ZeroPivotException lu(A, NoPivot())
+    @test_throws ZeroPivotException lu!(copy(A), NoPivot())
+    @test_throws ZeroPivotException lu(A, NoPivot(); check = true)
+    @test_throws ZeroPivotException lu!(copy(A), NoPivot(); check = true)
+    @test !issuccess(lu(A, NoPivot(); check = false))
+    @test !issuccess(lu!(copy(A), NoPivot(); check = false))
     F = lu(A; check = false)
     @test sprint((io, x) -> show(io, "text/plain", x), F) ==
         "Failed factorization of type $(typeof(F))"
 end
 
 @testset "conversion" begin
-    Random.seed!(3)
+    Random.seed!(4)
     a = Tridiagonal(rand(9),rand(10),rand(9))
     fa = Array(a)
     falu = lu(fa)
@@ -320,7 +320,7 @@ include("trickyarithmetic.jl")
 @testset "lu with type whose sum is another type" begin
     A = TrickyArithmetic.A[1 2; 3 4]
     ElT = TrickyArithmetic.D{TrickyArithmetic.C,TrickyArithmetic.C}
-    B = lu(A, Val(false))
+    B = lu(A, NoPivot())
     @test B isa LinearAlgebra.LU{ElT,Matrix{ElT}}
 end
 
@@ -398,4 +398,21 @@ end
         @test a == c
     end
 end
+
+@testset "lu(A) has a fallback for abstract matrices (#40831)" begin
+    # check that lu works for some structured arrays
+    A0 = rand(5, 5)
+    @test lu(Diagonal(A0)) isa LU
+    @test Matrix(lu(Diagonal(A0))) ≈ Diagonal(A0)
+    @test lu(Bidiagonal(A0, :U)) isa LU
+    @test Matrix(lu(Bidiagonal(A0, :U))) ≈ Bidiagonal(A0, :U)
+
+    # lu(A) copies A and then invokes lu!, make sure that the most efficient
+    # implementation of lu! continues to be used
+    A1 = Tridiagonal(rand(2), rand(3), rand(2))
+    @test lu(A1) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
+    @test lu(A1, RowMaximum()) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
+    @test lu(A1, RowMaximum(); check = false) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
+end
+
 end # module TestLU
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
index 6eed61f901aed4..1febdfe49fb3b2 100644
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ b/stdlib/LinearAlgebra/test/matmul.jl
@@ -766,4 +766,103 @@ end
     @test Matrix{Int}(undef, 2, 0) * Matrix{Int}(undef, 0, 3) == zeros(Int, 2, 3)
 end
 
+@testset "3-arg *, order by type" begin
+    x = [1, 2im]
+    y = [im, 20, 30+40im]
+    z = [-1, 200+im, -3]
+    A = [1 2 3im; 4 5 6+im]
+    B = [-10 -20; -30 -40]
+    a = 3 + im * round(Int, 10^6*(pi-3))
+    b = 123
+
+    @test x'*A*y == (x'*A)*y == x'*(A*y)
+    @test y'*A'*x == (y'*A')*x == y'*(A'*x)
+    @test y'*transpose(A)*x == (y'*transpose(A))*x == y'*(transpose(A)*x)
+
+    @test B*A*y == (B*A)*y == B*(A*y)
+
+    @test a*A*y == (a*A)*y == a*(A*y)
+    @test A*y*a == (A*y)*a == A*(y*a)
+
+    @test a*B*A == (a*B)*A == a*(B*A)
+    @test B*A*a == (B*A)*a == B*(A*a)
+
+    @test a*y'*z == (a*y')*z == a*(y'*z)
+    @test y'*z*a == (y'*z)*a == y'*(z*a)
+
+    @test a*y*z' == (a*y)*z' == a*(y*z')
+    @test y*z'*a == (y*z')*a == y*(z'*a)
+
+    @test a*x'*A == (a*x')*A == a*(x'*A)
+    @test x'*A*a == (x'*A)*a == x'*(A*a)
+    @test a*x'*A isa Adjoint{<:Any, <:Vector}
+
+    @test a*transpose(x)*A == (a*transpose(x))*A == a*(transpose(x)*A)
+    @test transpose(x)*A*a == (transpose(x)*A)*a == transpose(x)*(A*a)
+    @test a*transpose(x)*A isa Transpose{<:Any, <:Vector}
+
+    @test x'*B*A == (x'*B)*A == x'*(B*A)
+    @test x'*B*A isa Adjoint{<:Any, <:Vector}
+
+    @test y*x'*A == (y*x')*A == y*(x'*A)
+    y31 = reshape(y,3,1)
+    @test y31*x'*A == (y31*x')*A == y31*(x'*A)
+
+    vm = [rand(1:9,2,2) for _ in 1:3]
+    Mm = [rand(1:9,2,2) for _ in 1:3, _ in 1:3]
+
+    @test vm' * Mm * vm == (vm' * Mm) * vm == vm' * (Mm * vm)
+    @test Mm * Mm' * vm == (Mm * Mm') * vm == Mm * (Mm' * vm)
+    @test vm' * Mm * Mm == (vm' * Mm) * Mm == vm' * (Mm * Mm)
+    @test Mm * Mm' * Mm == (Mm * Mm') * Mm == Mm * (Mm' * Mm)
+end
+
+@testset "3-arg *, order by size" begin
+    M44 = randn(4,4)
+    M24 = randn(2,4)
+    M42 = randn(4,2)
+    @test M44*M44*M44 ≈ (M44*M44)*M44 ≈ M44*(M44*M44)
+    @test M42*M24*M44 ≈ (M42*M24)*M44 ≈ M42*(M24*M44)
+    @test M44*M42*M24 ≈ (M44*M42)*M24 ≈ M44*(M42*M24)
+end
+
+@testset "4-arg *, by type" begin
+    y = [im, 20, 30+40im]
+    z = [-1, 200+im, -3]
+    a = 3 + im * round(Int, 10^6*(pi-3))
+    b = 123
+    M = rand(vcat(1:9, im.*[1,2,3]), 3,3)
+    N = rand(vcat(1:9, im.*[1,2,3]), 3,3)
+
+    @test a * b * M * y == (a*b) * (M*y)
+    @test a * b * M * N == (a*b) * (M*N)
+    @test a * M * N * y == (a*M) * (N*y)
+    @test a * y' * M * z == (a*y') * (M*z)
+    @test a * y' * M * N == (a*y') * (M*N)
+
+    @test M * y * a * b == (M*y) * (a*b)
+    @test M * N * a * b == (M*N) * (a*b)
+    @test M * N * y * a == (a*M) * (N*y)
+    @test y' * M * z * a == (a*y') * (M*z)
+    @test y' * M * N * a == (a*y') * (M*N)
+
+    @test M * N * conj(M) * y == (M*N) * (conj(M)*y)
+    @test y' * M * N * conj(M) == (y'*M) * (N*conj(M))
+    @test y' * M * N * z == (y'*M) * (N*z)
+end
+
+@testset "4-arg *, by size" begin
+    for shift in 1:5
+        s1,s2,s3,s4,s5 = circshift(3:7, shift)
+        a=randn(s1,s2); b=randn(s2,s3); c=randn(s3,s4); d=randn(s4,s5)
+
+        # _quad_matmul
+        @test *(a,b,c,d) ≈ (a*b) * (c*d)
+
+        # _tri_matmul(A,B,B,δ)
+        @test *(11.1,b,c,d) ≈ (11.1*b) * (c*d)
+        @test *(a,b,c,99.9) ≈ (a*b) * (c*99.9)
+    end
+end
+
 end # module TestMatmul
diff --git a/stdlib/LinearAlgebra/test/pinv.jl b/stdlib/LinearAlgebra/test/pinv.jl
index 9be74730127aa2..d3eafb26797a98 100644
--- a/stdlib/LinearAlgebra/test/pinv.jl
+++ b/stdlib/LinearAlgebra/test/pinv.jl
@@ -158,6 +158,12 @@ end
         @test a.diag[2] ≈ 0.0
     end
 
+    @testset "hermitian matrices" begin
+        Q = ones(2,2)
+        C = pinv(Hermitian(Q))/0.25
+        @test C ≈ ones(2,2)
+    end
+
     if eltya <: LinearAlgebra.BlasReal
         @testset "sub-normal numbers/vectors/matrices" begin
             a = pinv(floatmin(eltya)/100)
diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl
index 394b371e02eac4..d6085565e3c7fc 100644
--- a/stdlib/LinearAlgebra/test/qr.jl
+++ b/stdlib/LinearAlgebra/test/qr.jl
@@ -11,7 +11,7 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(1234321)
+Random.seed!(1234325)
 
 areal = randn(n,n)/2
 aimg  = randn(n,n)/2
@@ -49,7 +49,6 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
             a_1 = size(a, 1)
             @testset "QR decomposition (without pivoting)" begin
                 qra   = @inferred qr(a)
-                @inferred qr(a)
                 q, r  = qra.Q, qra.R
                 @test_throws ErrorException qra.Z
                 @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
@@ -78,8 +77,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test Base.propertynames(qra)       == (:R, :Q)
             end
             @testset "Thin QR decomposition (without pivoting)" begin
-                qra   = @inferred qr(a[:, 1:n1], Val(false))
-                @inferred qr(a[:, 1:n1], Val(false))
+                qra   = @inferred qr(a[:, 1:n1], NoPivot())
                 q,r   = qra.Q, qra.R
                 @test_throws ErrorException qra.Z
                 @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
@@ -104,7 +102,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test Base.propertynames(qra)       == (:R, :Q)
             end
             @testset "(Automatic) Fat (pivoted) QR decomposition" begin
-                @inferred qr(a, Val(true))
+                @inferred qr(a, ColumnNorm())
 
                 qrpa  = factorize(a[1:n1,:])
                 q,r = qrpa.Q, qrpa.R
@@ -190,7 +188,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test mul!(c, b, q') ≈ b*q'
                 @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
 
-                qra = qr(a[:,1:n1], Val(false))
+                qra = qr(a[:,1:n1], NoPivot())
                 q, r = qra.Q, qra.R
                 @test rmul!(copy(squareQ(q)'), q) ≈ Matrix(I, n, n)
                 @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),q)
@@ -214,11 +212,8 @@ end
 
 @testset "transpose errors" begin
     @test_throws MethodError transpose(qr(randn(3,3)))
-    @test_throws MethodError adjoint(qr(randn(3,3)))
-    @test_throws MethodError transpose(qr(randn(3,3), Val(false)))
-    @test_throws MethodError adjoint(qr(randn(3,3), Val(false)))
+    @test_throws MethodError transpose(qr(randn(3,3), NoPivot()))
     @test_throws MethodError transpose(qr(big.(randn(3,3))))
-    @test_throws MethodError adjoint(qr(big.(randn(3,3))))
 end
 
 @testset "Issue 7304" begin
@@ -256,7 +251,7 @@ end
     A = zeros(1, 2)
     B = zeros(1, 1)
     @test A \ B == zeros(2, 1)
-    @test qr(A, Val(true)) \ B == zeros(2, 1)
+    @test qr(A, ColumnNorm()) \ B == zeros(2, 1)
 end
 
 @testset "Issue 24107" begin
@@ -278,7 +273,7 @@ end
     @test A \b ≈ ldiv!(c, qr(A ), b)
     @test b == b0
     c0 = copy(c)
-    @test Ac\c ≈ ldiv!(b, qr(Ac, Val(true)), c)
+    @test Ac\c ≈ ldiv!(b, qr(Ac, ColumnNorm()), c)
     @test c0 == c
 end
 
@@ -295,11 +290,11 @@ end
 
 @testset "det(Q::Union{QRCompactWYQ, QRPackedQ})" begin
     # 40 is the number larger than the default block size 36 of QRCompactWY
-    @testset for n in [1:3; 40], m in [1:3; 40], pivot in [false, true]
+    @testset for n in [1:3; 40], m in [1:3; 40], pivot in (NoPivot(), ColumnNorm())
         @testset "real" begin
             @testset for k in 0:min(n, m, 5)
                 A = cat(Array(I(k)), randn(n - k, m - k); dims=(1, 2))
-                Q, = qr(A, Val(pivot))
+                Q, = qr(A, pivot)
                 @test det(Q) ≈ det(collect(Q))
                 @test abs(det(Q)) ≈ 1
             end
@@ -307,7 +302,7 @@ end
         @testset "complex" begin
             @testset for k in 0:min(n, m, 5)
                 A = cat(Array(I(k)), randn(ComplexF64, n - k, m - k); dims=(1, 2))
-                Q, = qr(A, Val(pivot))
+                Q, = qr(A, pivot)
                 @test det(Q) ≈ det(collect(Q))
                 @test abs(det(Q)) ≈ 1
             end
@@ -371,4 +366,53 @@ end
     end
 end
 
+@testset "adjoint of QR" begin
+    n = 5
+    B = randn(5, 2)
+
+    @testset "size(b)=$(size(b))" for b in (B[:, 1], B)
+        @testset "size(A)=$(size(A))" for A in (
+            randn(n, n),
+            # Wide problems become minimum norm (in x) problems similarly to LQ
+            randn(n + 2, n),
+            complex.(randn(n, n), randn(n, n)))
+
+            @testset "QRCompactWY" begin
+                F = qr(A)
+                x = F'\b
+                @test x ≈ A'\b
+                @test length(size(x)) == length(size(b))
+            end
+
+            @testset "QR" begin
+                F = LinearAlgebra.qrfactUnblocked!(copy(A))
+                x = F'\b
+                @test x ≈ A'\b
+                @test length(size(x)) == length(size(b))
+            end
+
+            @testset "QRPivoted" begin
+                F = LinearAlgebra.qr(A, ColumnNorm())
+                x = F'\b
+                @test x ≈ A'\b
+                @test length(size(x)) == length(size(b))
+            end
+        end
+        @test_throws DimensionMismatch("overdetermined systems are not supported")    qr(randn(n - 2, n))'\b
+        @test_throws DimensionMismatch("arguments must have the same number of rows") qr(randn(n, n + 1))'\b
+        @test_throws DimensionMismatch("overdetermined systems are not supported")    LinearAlgebra.qrfactUnblocked!(randn(n - 2, n))'\b
+        @test_throws DimensionMismatch("arguments must have the same number of rows") LinearAlgebra.qrfactUnblocked!(randn(n, n + 1))'\b
+        @test_throws DimensionMismatch("overdetermined systems are not supported")    qr(randn(n - 2, n), ColumnNorm())'\b
+        @test_throws DimensionMismatch("arguments must have the same number of rows") qr(randn(n, n + 1), ColumnNorm())'\b
+    end
+end
+
+@testset "issue #38974" begin
+    A = qr(ones(3, 1))
+    B = I(3)
+    C = B*A.Q'
+    @test C ≈ A.Q
+    @test A.Q' * B ≈ A.Q
+end
+
 end # module TestQR
diff --git a/stdlib/LinearAlgebra/test/schur.jl b/stdlib/LinearAlgebra/test/schur.jl
index feb0ef8513b897..d047ca12abc1f2 100644
--- a/stdlib/LinearAlgebra/test/schur.jl
+++ b/stdlib/LinearAlgebra/test/schur.jl
@@ -132,6 +132,74 @@ aimg  = randn(n,n)/2
         @test Z == A
         @test λ == zeros(0)
     end
+
+    if eltya <: Real
+        @testset "quasitriangular to triangular" begin
+            S = schur(a)
+            SC = Schur{Complex}(S)
+            @test eltype(SC) == complex(eltype(S))
+            @test istriu(SC.T)
+            @test SC.Z*SC.Z' ≈ I
+            @test SC.Z*SC.T*SC.Z' ≈ a
+            @test sort(SC.values,by=LinearAlgebra.eigsortby) ≈ sort(S.values,by=LinearAlgebra.eigsortby)
+            @test Schur{Complex}(SC) === SC === Schur{eltype(SC)}(SC)
+            @test Schur{eltype(S)}(S) === S
+            if eltype(S) === Float32
+                S64 = Schur{Float64}(S)
+                @test eltype(S64) == Float64
+                @test S64.Z == S.Z
+                @test S64.T == S.T
+                @test S64.values == S.values
+            end
+        end
+    end
+
+    @testset "0x0 $eltya matrices" begin
+        A = zeros(eltya, 0, 0)
+        B = zeros(eltya, 0, 0)
+        S = LinearAlgebra.schur(A, B)
+        @test S.S == A
+        @test S.T == A
+        @test S.Q == A
+        @test S.Z == A
+        @test S.alpha == zeros(0)
+        @test S.beta == zeros(0)
+    end
+end
+
+@testset "Generalized Schur convergence" begin
+    # Check for convergence issues, #40279
+    problematic_pencils = [
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 3.7796350217469814 -3.3125635598133054 0.0 0.0 0.0 0.0 0.0 0.0 6.418270043493963 -6.625127119626611 0.0 0.0 0.0 0.0 0.0 -1.0; -3.312563559813306 3.779635021746982 0.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626612 6.418270043493964 -1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 3.7796350217469814 0.0 0.0 -3.3125635598133054 0.0 0.0 0.0 -1.0 6.418270043493963 0.0 0.0 -6.625127119626611 0.0 0.0; 0.0 0.0 0.0 3.779635021746982 -3.312563559813306 0.0 0.0 0.0 0.0 0.0 0.0 6.418270043493964 -6.625127119626612 0.0 -1.0 0.0; 0.0 0.0 0.0 -3.3125635598133054 3.7796350217469814 0.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626611 6.418270043493963 -1.0 0.0 0.0; 0.0 0.0 -3.312563559813306 0.0 0.0 3.779635021746982 0.0 0.0 0.0 0.0 -6.625127119626612 0.0 -1.0 6.418270043493964 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 3.7796350217469814 -3.3125635598133054 0.0 0.0 0.0 -1.0 0.0 0.0 6.418270043493963 -6.625127119626611; 0.0 0.0 0.0 0.0 0.0 0.0 -3.312563559813306 3.779635021746982 -1.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626612 6.418270043493964],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 3.312563559813306 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 -3.779635021746982 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 0.0 0.0 3.312563559813306 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.779635021746982 3.3125635598133054 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.312563559813306 -3.7796350217469814 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 0.0 0.0 -3.779635021746982 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 3.312563559813306; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 -3.779635021746982]
+        ),
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 -1.0 0.0 0.0 0.0 0.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 -1.0 -2.62 0.0 0.0 0.0 0.0 0.0; 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0; 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0; 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]
+        ),
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.33748484079831426 -0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853 0.0 0.0 0.0 0.0 0.0 -1.0; -0.10323794456968927 0.3374848407983142 0.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713 -1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.33748484079831426 0.0 0.0 -0.10323794456968927 0.0 0.0 0.0 -1.0 -2.5940303184033713 0.0 0.0 -0.20647588913937853 0.0 0.0; 0.0 0.0 0.0 0.3374848407983142 -0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853 0.0 -1.0 0.0; 0.0 0.0 0.0 -0.10323794456968927 0.33748484079831426 0.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713 -1.0 0.0 0.0; 0.0 0.0 -0.10323794456968927 0.0 0.0 0.3374848407983142 0.0 0.0 0.0 0.0 -0.20647588913937853 0.0 -1.0 -2.5940303184033713 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.33748484079831426 -0.10323794456968927 0.0 0.0 0.0 -1.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853; 0.0 0.0 0.0 0.0 0.0 0.0 -0.10323794456968927 0.3374848407983142 -1.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.3374848407983142 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.0 0.0 0.10323794456968927 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.3374848407983142 0.10323794456968927 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.33748484079831426 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 0.0 0.0 -0.3374848407983142 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.10323794456968927; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.3374848407983142]
+        ),
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 1.7391668762048442 -1.309613611600033 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.150333752409688 -2.619227223200066 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0; -1.3096136116000332 1.739166876204844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.6192272232000664 2.150333752409688 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.739166876204844 0.0 0.0 -1.3096136116000332 0.0 0.0 0.0 0.0 0.0 -1.0 2.150333752409688 0.0 0.0 -2.6192272232000664 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.739166876204844 0.0 0.0 0.0 0.0 -1.3096136116000332 0.0 -1.0 0.0 0.0 2.150333752409688 0.0 0.0 0.0 0.0 -2.6192272232000664 0.0; 0.0 0.0 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 -1.309613611600033 0.0 0.0 0.0 0.0 2.150333752409688 -1.0 0.0 0.0 0.0 -2.619227223200066; 0.0 0.0 -1.309613611600033 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 0.0 0.0 -2.619227223200066 0.0 -1.0 2.150333752409688 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.739166876204844 -1.3096136116000332 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.150333752409688 -2.6192272232000664 0.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 -1.309613611600033 1.7391668762048442 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.619227223200066 2.150333752409688 -1.0 0.0; 0.0 0.0 0.0 -1.309613611600033 0.0 0.0 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 -2.619227223200066 0.0 0.0 0.0 -1.0 2.150333752409688 0.0; 0.0 0.0 0.0 0.0 -1.3096136116000332 0.0 0.0 0.0 0.0 1.739166876204844 0.0 0.0 0.0 0.0 -2.6192272232000664 0.0 -1.0 0.0 0.0 2.150333752409688],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.7391668762048442 1.3096136116000332 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.309613611600033 -1.739166876204844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 0.0 0.0 1.309613611600033 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 0.0 0.0 0.0 0.0 1.309613611600033 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.7391668762048442 0.0 0.0 0.0 0.0 1.3096136116000332; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 0.0 0.0 -1.7391668762048442 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 1.309613611600033 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 -1.7391668762048442 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 0.0 0.0 0.0 0.0 -1.7391668762048442 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.309613611600033 0.0 0.0 0.0 0.0 -1.739166876204844]
+        ),
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007; 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769246 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230784 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769246 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230784 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788; -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 -6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769244 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769244 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615393 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384622 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624]
+        )]
+
+    for (A, B) in problematic_pencils
+        f = schur(A, B)
+        @test f.Q*f.S*f.Z' ≈ A
+        @test f.Q*f.T*f.Z' ≈ B
+    end
+end
+
+@testset "adjoint and transpose for schur (#40941)" begin
+    A = rand(3, 3)
+    B = schur(A', A)
+    C = B.left*B.S*B.right'
+    D = schur(transpose(A), A)
+    E = D.left*D.S*D.right'
+    @test A' ≈ C ≈ E
 end
 
 end # module TestSchur
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
index c23371f3d072e0..bf4c8dee589775 100644
--- a/stdlib/LinearAlgebra/test/special.jl
+++ b/stdlib/LinearAlgebra/test/special.jl
@@ -192,10 +192,10 @@ end
         a = rand(n,n)
         atri = typ(a)
         b = rand(n,n)
-        qrb = qr(b,Val(true))
+        qrb = qr(b, ColumnNorm())
         @test *(atri, adjoint(qrb.Q)) ≈ Matrix(atri) * qrb.Q'
         @test rmul!(copy(atri), adjoint(qrb.Q)) ≈ Matrix(atri) * qrb.Q'
-        qrb = qr(b,Val(false))
+        qrb = qr(b, NoPivot())
         @test *(atri, adjoint(qrb.Q)) ≈ Matrix(atri) * qrb.Q'
         @test rmul!(copy(atri), adjoint(qrb.Q)) ≈ Matrix(atri) * qrb.Q'
     end
@@ -434,4 +434,20 @@ end
     end
 end
 
+@testset "BiTriSym*Q' and Q'*BiTriSym" begin
+    dl = [1, 1, 1];
+    d = [1, 1, 1, 1];
+    Tri = Tridiagonal(dl, d, dl)
+    Bi = Bidiagonal(d, dl, :L)
+    Sym = SymTridiagonal(d, dl)
+    F = qr(ones(4, 1))
+    A = F.Q'
+    @test Tri*A ≈ Matrix(Tri)*A
+    @test A*Tri ≈ A*Matrix(Tri)
+    @test Bi*A ≈ Matrix(Bi)*A
+    @test A*Bi ≈ A*Matrix(Bi)
+    @test Sym*A ≈ Matrix(Sym)*A
+    @test A*Sym ≈ A*Matrix(Sym)
+end
+
 end # module TestSpecial
diff --git a/stdlib/LinearAlgebra/test/structuredbroadcast.jl b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
index fea7fa08f05342..4aeca31a79a03d 100644
--- a/stdlib/LinearAlgebra/test/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
@@ -206,6 +206,33 @@ end
 
 end
 
+struct Zero36193 end
+Base.iszero(::Zero36193) = true
+LinearAlgebra.iszerodefined(::Type{Zero36193}) = true
+@testset "PR #36193" begin
+    f(::Union{Int, Zero36193}) = Zero36193()
+    function test(el)
+        M = [el el
+             el el]
+        v = [el, el]
+        U = UpperTriangular(M)
+        L = LowerTriangular(M)
+        D = Diagonal(v)
+        for (T, A) in [(UpperTriangular, U), (LowerTriangular, L), (Diagonal, D)]
+            @test identity.(A) isa typeof(A)
+            @test map(identity, A) isa typeof(A)
+            @test f.(A) isa T{Zero36193}
+            @test map(f, A) isa T{Zero36193}
+        end
+    end
+    # This should not need `zero(::Type{Zero36193})` to be defined
+    test(1)
+    Base.zero(::Type{Zero36193}) = Zero36193()
+    # This should not need `==(::Zero36193, ::Int)` to be defined as `iszerodefined`
+    # returns true.
+    test(Zero36193())
+end
+
 # structured broadcast with function returning non-number type
 @test tuple.(Diagonal([1, 2])) == [(1,) (0,); (0,) (2,)]
 
diff --git a/stdlib/LinearAlgebra/test/svd.jl b/stdlib/LinearAlgebra/test/svd.jl
index 30dd6db300eb96..8bd3edadc911df 100644
--- a/stdlib/LinearAlgebra/test/svd.jl
+++ b/stdlib/LinearAlgebra/test/svd.jl
@@ -23,6 +23,10 @@ using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
     @test F.U'F.U ≊ Matrix(I, 2, 2)
     @test F.Vt'*F.Vt ≊ [1]
     @test @inferred(svdvals(3:4)) ≊ [5]
+    A = Matrix(1.0I, 2, 2)
+    Z = svd(Hermitian(A); full=true)
+    @test Z.S ≈ ones(2)
+    @test Z.U'Z.U ≈ I(2)
 
     m1 = [2 0; 0 0]
     m2 = [2 -2; 1 1]/sqrt(2)
@@ -217,4 +221,44 @@ end
     @test Uc * diagm(0=>Sc) * transpose(V) ≈ complex.(A) rtol=1e-3
 end
 
+@testset "Issue 40944. ldiV!(SVD) should update rhs" begin
+    F = svd(randn(2, 2))
+    b = randn(2)
+    x = ldiv!(F, b)
+    @test x === b
+end
+
+@testset "adjoint of SVD" begin
+    n = 5
+    B = randn(5, 2)
+
+    @testset "size(b)=$(size(b))" for b in (B[:, 1], B)
+        @testset "size(A)=$(size(A))" for A in (
+            randn(n, n),
+            # Wide problems become minimum norm (in x) problems similarly to LQ
+            randn(n + 2, n),
+            randn(n - 2, n),
+            complex.(randn(n, n), randn(n, n)))
+
+            F = svd(A)
+            x = F'\b
+            @test x ≈ A'\b
+            @test length(size(x)) == length(size(b))
+        end
+    end
+end
+
+@testset "Float16" begin
+    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
+    B = svd(A)
+    B32 = svd(Float32.(A))
+    @test B isa SVD{Float16, Float16, Matrix{Float16}}
+    @test B.U isa Matrix{Float16}
+    @test B.Vt isa Matrix{Float16}
+    @test B.S isa Vector{Float16}
+    @test B.U ≈ B32.U
+    @test B.Vt ≈ B32.Vt
+    @test B.S ≈ B32.S
+end
+
 end # module TestSVD
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
index d23eecb5be46e4..f20b6fe2acc977 100644
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ b/stdlib/LinearAlgebra/test/symmetric.jl
@@ -11,13 +11,19 @@ Random.seed!(1010)
     @test ishermitian(σ)
 end
 
+@testset "Two-dimensional Euler formula for Hermitian" begin
+    @test cis(Hermitian([π 0; 0 π])) ≈ -I
+end
+
 @testset "Hermitian matrix exponential/log" begin
     A1 = randn(4,4) + im*randn(4,4)
     A2 = A1 + A1'
     @test exp(A2) ≈ exp(Hermitian(A2))
+    @test cis(A2) ≈ cis(Hermitian(A2))
     @test log(A2) ≈ log(Hermitian(A2))
     A3 = A1 * A1' # posdef
     @test exp(A3) ≈ exp(Hermitian(A3))
+    @test cis(A3) ≈ cis(Hermitian(A3))
     @test log(A3) ≈ log(Hermitian(A3))
 
     A1 = randn(4,4)
@@ -545,6 +551,26 @@ end
     end
 end
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    immutablemat = ImmutableArray([1 2 3; 4 5 6; 7 8 9])
+    for SymType in (Symmetric, Hermitian)
+        S = Float64
+        symmat = SymType(immutablemat)
+        @test convert(AbstractArray{S}, symmat).data isa ImmutableArray{S}
+        @test convert(AbstractMatrix{S}, symmat).data isa ImmutableArray{S}
+        @test AbstractArray{S}(symmat).data isa ImmutableArray{S}
+        @test AbstractMatrix{S}(symmat).data isa ImmutableArray{S}
+        @test convert(AbstractArray{S}, symmat) == symmat
+        @test convert(AbstractMatrix{S}, symmat) == symmat
+    end
+end
+
+
 @testset "#24572: eltype(A::HermOrSym) === eltype(parent(A))" begin
     A = rand(Float32, 3, 3)
     @test_throws TypeError Symmetric{Float64,Matrix{Float32}}(A, 'U')
@@ -685,45 +711,37 @@ end
 
 @testset "Multiplications symmetric/hermitian for $T and $S" for T in
         (Float16, Float32, Float64, BigFloat), S in (ComplexF16, ComplexF32, ComplexF64)
-    let A = Transpose(Symmetric(rand(S, 3, 3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
-        @test A * Bv ≈ parent(A) * Bv
-        @test A * Bm ≈ parent(A) * Bm
-        @test Bm * A ≈ Bm * parent(A)
+    let A = transpose(Symmetric(rand(S, 3, 3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
+        @test A * Bv ≈ Matrix(A) * Bv
+        @test A * Bm ≈ Matrix(A) * Bm
+        @test Bm * A ≈ Bm * Matrix(A)
     end
-    let A = Adjoint(Hermitian(rand(S, 3,3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
-        @test A * Bv ≈ parent(A) * Bv
-        @test A * Bm ≈ parent(A) * Bm
-        @test Bm * A ≈ Bm * parent(A)
+    let A = adjoint(Hermitian(rand(S, 3,3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
+        @test A * Bv ≈ Matrix(A) * Bv
+        @test A * Bm ≈ Matrix(A) * Bm
+        @test Bm * A ≈ Bm * Matrix(A)
     end
-end
-
-@testset "Dsiambiguation multiplication with transposed AbstractMatrix methods in linalg/matmul.jl for $T and $S" for T in
-        (Float16, Float32, Float64, BigFloat), S in (ComplexF16, ComplexF32, ComplexF64)
-    let Ahrs = Transpose(Hermitian(Symmetric(rand(T, 3, 3)))),
-        Acs = Transpose(Symmetric(rand(S, 3, 3))),
-        Ahcs = Transpose(Hermitian(Symmetric(rand(S, 3, 3))))
-
-        @test Ahrs * Ahrs ≈ Ahrs * parent(Ahrs)
-        @test Ahrs * Acs ≈ Ahrs * parent(Acs)
-        @test Acs * Acs ≈ parent(Acs) * parent(Acs)
-        @test Acs * Ahrs ≈ parent(Acs) * Ahrs
-        @test Ahrs * Ahcs ≈ parent(Ahrs) * Ahcs
-        @test Ahcs * Ahrs ≈ Ahcs * parent(Ahrs)
+    let Ahrs = transpose(Hermitian(Symmetric(rand(T, 3, 3)))),
+        Acs = transpose(Symmetric(rand(S, 3, 3))),
+        Ahcs = transpose(Hermitian(Symmetric(rand(S, 3, 3))))
+
+        @test Ahrs * Ahrs ≈ Ahrs * Matrix(Ahrs)
+        @test Ahrs * Acs ≈ Ahrs * Matrix(Acs)
+        @test Acs * Acs ≈ Matrix(Acs) * Matrix(Acs)
+        @test Acs * Ahrs ≈ Matrix(Acs) * Ahrs
+        @test Ahrs * Ahcs ≈ Matrix(Ahrs) * Ahcs
+        @test Ahcs * Ahrs ≈ Ahcs * Matrix(Ahrs)
     end
-end
-
-@testset "Dsiambiguation multiplication with adjointed AbstractMatrix methods in linalg/matmul.jl for $T and $S" for T in
-        (Float16, Float32, Float64, BigFloat), S in (ComplexF16, ComplexF32, ComplexF64)
-    let Ahrs = Adjoint(Hermitian(Symmetric(rand(T, 3, 3)))),
-        Acs = Adjoint(Symmetric(rand(S, 3, 3))),
-        Ahcs = Adjoint(Hermitian(Symmetric(rand(S, 3, 3))))
-
-        @test Ahrs * Ahrs ≈ Ahrs * parent(Ahrs)
-        @test Ahcs * Ahcs ≈ parent(Ahcs) * parent(Ahcs)
-        @test Ahrs * Ahcs ≈ Ahrs * parent(Ahcs)
-        @test Acs * Ahcs ≈ Acs * parent(Ahcs)
-        @test Ahcs * Ahrs ≈ parent(Ahcs) * Ahrs
-        @test Ahcs * Acs ≈ parent(Ahcs) * Acs
+    let Ahrs = adjoint(Hermitian(Symmetric(rand(T, 3, 3)))),
+        Acs = adjoint(Symmetric(rand(S, 3, 3))),
+        Ahcs = adjoint(Hermitian(Symmetric(rand(S, 3, 3))))
+
+        @test Ahrs * Ahrs ≈ Ahrs * Matrix(Ahrs)
+        @test Ahcs * Ahcs ≈ Matrix(Ahcs) * Matrix(Ahcs)
+        @test Ahrs * Ahcs ≈ Ahrs * Matrix(Ahcs)
+        @test Acs * Ahcs ≈ Acs * Matrix(Ahcs)
+        @test Ahcs * Ahrs ≈ Matrix(Ahcs) * Ahrs
+        @test Ahcs * Acs ≈ Matrix(Ahcs) * Acs
     end
 end
 
diff --git a/stdlib/LinearAlgebra/test/testgroups b/stdlib/LinearAlgebra/test/testgroups
index b33dfecaa82eee..de082d8e7dce08 100644
--- a/stdlib/LinearAlgebra/test/testgroups
+++ b/stdlib/LinearAlgebra/test/testgroups
@@ -25,3 +25,4 @@ givens
 structuredbroadcast
 addmul
 ldlt
+factorization
diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl
index 030e4a27625f6b..6950d7a956b87b 100644
--- a/stdlib/LinearAlgebra/test/triangular.jl
+++ b/stdlib/LinearAlgebra/test/triangular.jl
@@ -4,7 +4,7 @@ module TestTriangular
 
 debug = false
 using Test, LinearAlgebra, SparseArrays, Random
-using LinearAlgebra: BlasFloat, errorbounds, full!, naivesub!, transpose!,
+using LinearAlgebra: BlasFloat, errorbounds, full!, transpose!,
     UnitUpperTriangular, UnitLowerTriangular,
     mul!, rdiv!, rmul!, lmul!
 
@@ -282,7 +282,7 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
         @test sqrt(A1) |> (t -> (t*t)::typeof(t)) ≈ A1
 
         # naivesub errors
-        @test_throws DimensionMismatch naivesub!(A1,Vector{elty1}(undef,n+1))
+        @test_throws DimensionMismatch ldiv!(A1, Vector{elty1}(undef, n+1))
 
         # eigenproblems
         if !(elty1 in (BigFloat, Complex{BigFloat})) # Not handled yet
@@ -476,7 +476,11 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
             @test_throws DimensionMismatch Ann'\bm
             @test_throws DimensionMismatch transpose(Ann)\bm
             if t1 == UpperTriangular || t1 == LowerTriangular
-                @test_throws LinearAlgebra.SingularException naivesub!(t1(zeros(elty1,n,n)),fill(eltyB(1),n))
+                if elty1 === eltyB <: BlasFloat
+                    @test_throws LAPACKException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
+                else
+                    @test_throws SingularException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
+                end
             end
             @test B/A1 ≈ B/Matrix(A1)
             @test B/transpose(A1) ≈ B/transpose(Matrix(A1))
@@ -509,6 +513,41 @@ Atu = UnitUpperTriangular([1 1 2; 0 1 2; 0 0 1])
 @test typeof(sqrt(Atu)[1,1]) <: Real
 @test typeof(sqrt(complex(Atu))[1,1]) <: Complex
 
+@testset "matrix square root quasi-triangular blockwise" begin
+    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
+        A = schur(rand(T, 100, 100)^2).T
+        @test LinearAlgebra.sqrt_quasitriu(A; blockwidth=16)^2 ≈ A
+    end
+    n = 256
+    A = rand(ComplexF64, n, n)
+    U = schur(A).T
+    Ubig = Complex{BigFloat}.(U)
+    @test LinearAlgebra.sqrt_quasitriu(U; blockwidth=64) ≈ LinearAlgebra.sqrt_quasitriu(Ubig; blockwidth=64)
+end
+
+@testset "sylvester quasi-triangular blockwise" begin
+    @testset for T in (Float32, Float64, ComplexF32, ComplexF64), m in (15, 40), n in (15, 45)
+        A = schur(rand(T, m, m)).T
+        B = schur(rand(T, n, n)).T
+        C = randn(T, m, n)
+        Ccopy = copy(C)
+        X = LinearAlgebra._sylvester_quasitriu!(A, B, C; blockwidth=16)
+        @test X === C
+        @test A * X + X * B ≈ -Ccopy
+
+        @testset "test raise=false does not break recursion" begin
+            Az = zero(A)
+            Bz = zero(B)
+            C2 = copy(Ccopy)
+            @test_throws LAPACKException LinearAlgebra._sylvester_quasitriu!(Az, Bz, C2; blockwidth=16)
+            m == n || @test any(C2 .== Ccopy)  # recursion broken
+            C3 = copy(Ccopy)
+            X3 = LinearAlgebra._sylvester_quasitriu!(Az, Bz, C3; blockwidth=16, raise=false)
+            @test !any(X3 .== Ccopy)  # recursion not broken
+        end
+    end
+end
+
 @testset "check matrix logarithm type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
     A = UpperTriangular(exp(triu(randn(elty, n, n))))
     @inferred Union{typeof(A),typeof(complex(A))} log(A)
@@ -662,6 +701,25 @@ end
     end
 end
 
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "AbstractArray constructor should preserve underlying storage type" begin
+    # tests corresponding to #34995
+    local m = 4
+    local T, S = Float32, Float64
+    immutablemat = ImmutableArray(randn(T,m,m))
+    for TriType in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
+        trimat = TriType(immutablemat)
+        @test convert(AbstractArray{S}, trimat).data isa ImmutableArray{S}
+        @test convert(AbstractMatrix{S}, trimat).data isa ImmutableArray{S}
+        @test AbstractArray{S}(trimat).data isa ImmutableArray{S}
+        @test AbstractMatrix{S}(trimat).data isa ImmutableArray{S}
+        @test convert(AbstractArray{S}, trimat) == trimat
+        @test convert(AbstractMatrix{S}, trimat) == trimat
+    end
+end
+
 @testset "inplace mul of appropriate types should preserve triagular structure" begin
     for elty1 in (Float64, ComplexF32), elty2 in (Float64, ComplexF32)
         T = promote_type(elty1, elty2)
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
index ec777bcd462222..31e107ddc0e3c8 100644
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ b/stdlib/LinearAlgebra/test/tridiag.jl
@@ -198,10 +198,8 @@ end
         @testset "similar, size, and copyto!" begin
             B = similar(A)
             @test size(B) == size(A)
-            if mat_type == Tridiagonal # doesn't work for SymTridiagonal yet
-                copyto!(B, A)
-                @test B == A
-            end
+            copyto!(B, A)
+            @test B == A
             @test isa(similar(A), mat_type{elty})
             @test isa(similar(A, Int), mat_type{Int})
             @test isa(similar(A, (3, 2)), SparseMatrixCSC)
@@ -589,7 +587,9 @@ end
     A2 = SymTridiagonal(fill(1.0, 3), fill(-1.0, 2))
     F2 = eigen(A2)
     test_approx_eq_modphase(F.vectors, F2.vectors)
-    @test F.values ≈ F2.values
+    @test F.values ≈ F2.values ≈ eigvals(A) ≈ eigvals(A2)
+    @test eigvecs(A) ≈ eigvecs(A2)
+    @test eigvecs(A, eigvals(A)[1:1]) ≈ eigvecs(A2, eigvals(A2)[1:1])
 end
 
 @testset "non-commutative algebra (#39701)" begin
@@ -603,4 +603,77 @@ end
     end
 end
 
+@testset "adjoint of LDLt" begin
+    Sr = SymTridiagonal(randn(5), randn(4))
+    Sc = SymTridiagonal(complex.(randn(5)) .+ 1im, complex.(randn(4), randn(4)))
+    b = ones(size(Sr, 1))
+
+    F = ldlt(Sr)
+    @test F\b == F'\b
+
+    F = ldlt(Sc)
+    @test copy(Sc')\b == F'\b
+end
+
+@testset "symmetric and hermitian tridiagonals" begin
+    A = [im 0; 0 -im]
+    @test issymmetric(A)
+    @test !ishermitian(A)
+
+    # real
+    A = SymTridiagonal(randn(5), randn(4))
+    @test issymmetric(A)
+    @test ishermitian(A)
+
+    A = Tridiagonal(A.ev, A.dv, A.ev .+ 1)
+    @test !issymmetric(A)
+    @test !ishermitian(A)
+
+    # complex
+    # https://github.com/JuliaLang/julia/pull/41037#discussion_r645524081
+    S = SymTridiagonal(randn(5) .+ 0im, randn(5) .+ 0im)
+    S.ev[end] = im
+    @test issymmetric(S)
+    @test ishermitian(S)
+
+    S = SymTridiagonal(randn(5) .+ 1im, randn(4) .+ 1im)
+    @test issymmetric(S)
+    @test !ishermitian(S)
+
+    S = Tridiagonal(S.ev, S.dv, adjoint.(S.ev))
+    @test !issymmetric(S)
+    @test !ishermitian(S)
+
+    S = Tridiagonal(S.dl, real.(S.d) .+ 0im, S.du)
+    @test !issymmetric(S)
+    @test ishermitian(S)
+end
+
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    v1 = ImmutableArray([1, 2])
+    v2 = ImmutableArray([3, 4, 5])
+    v3 = ImmutableArray([6, 7])
+    T = Tridiagonal(v1, v2, v3)
+    Tsym = SymTridiagonal(v2, v1)
+
+    @test convert(AbstractArray{Float64}, T)::Tridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == T
+    @test convert(AbstractMatrix{Float64}, T)::Tridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == T
+    @test convert(AbstractArray{Float64}, Tsym)::SymTridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Tsym
+    @test convert(AbstractMatrix{Float64}, Tsym)::SymTridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Tsym
+end
+
+@testset "dot(x,A,y) for A::Tridiagonal or SymTridiagonal" begin
+    for elty in (Float32, Float64, ComplexF32, ComplexF64, Int)
+        x = fill(convert(elty, 1), 0)
+        T = Tridiagonal(x, x, x)
+        Tsym = SymTridiagonal(x, x)
+        @test dot(x, T, x) == 0.0
+        @test dot(x, Tsym, x) == 0.0
+    end
+end
+
 end # module TestTridiagonal
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
index 27f6641657e81b..c043db07d2effc 100644
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/test/uniformscaling.jl
@@ -7,8 +7,10 @@ using Test, LinearAlgebra, Random, SparseArrays
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
 using .Main.Quaternions
+isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
+using .Main.OffsetArrays
 
-Random.seed!(123)
+Random.seed!(1234543)
 
 @testset "basic functions" begin
     @test I === I' # transpose
@@ -45,6 +47,14 @@ end
         (2:3, 1:2),
         (2:-1:1, 1:2),
         (1:2:9, 5:2:13),
+        (1, [1,2,5]),
+        (1, [1,10,5,2]),
+        (10, [10]),
+        ([1], 1),
+        ([15,1,5,2], 6),
+        ([2], [2]),
+        ([2,9,8,2,1], [2,8,4,3,1]),
+        ([8,3,5,3], 2:9),
     ]
         @test I[a,b] == J[a,b]
     end
@@ -333,10 +343,19 @@ end
         B = T(rand(3,3))
         C = T(rand(0,3))
         D = T(rand(2,0))
+        E = T(rand(1,3))
+        F = T(rand(3,1))
+        α = rand()
         @test (hcat(A, 2I))::T == hcat(A, Matrix(2I, 3, 3))
+        @test (hcat(E, α))::T == hcat(E, [α])
+        @test (hcat(E, α, 2I))::T == hcat(E, [α], fill(2, 1, 1))
         @test (vcat(A, 2I))::T == vcat(A, Matrix(2I, 4, 4))
+        @test (vcat(F, α))::T == vcat(F, [α])
+        @test (vcat(F, α, 2I))::T == vcat(F, [α], fill(2, 1, 1))
         @test (hcat(C, 2I))::T == C
+        @test_throws DimensionMismatch hcat(C, α)
         @test (vcat(D, 2I))::T == D
+        @test_throws DimensionMismatch vcat(D, α)
         @test (hcat(I, 3I, A, 2I))::T == hcat(Matrix(I, 3, 3), Matrix(3I, 3, 3), A, Matrix(2I, 3, 3))
         @test (vcat(I, 3I, A, 2I))::T == vcat(Matrix(I, 4, 4), Matrix(3I, 4, 4), A, Matrix(2I, 4, 4))
         @test (hvcat((2,1,2), B, 2I, I, 3I, 4I))::T ==
@@ -351,6 +370,9 @@ end
             hvcat((2,2,2), B, Matrix(2I, 3, 3), C, C, Matrix(3I, 3, 3), Matrix(4I, 3, 3))
         @test hvcat((3,2,1), C, C, I, B ,3I, 2I)::T ==
             hvcat((2,2,1), C, C, B, Matrix(3I,3,3), Matrix(2I,6,6))
+        @test (hvcat((1,2), A, E, α))::T == hvcat((1,2), A, E, [α]) == hvcat((1,2), A, E, α*I)
+        @test (hvcat((2,2), α, E, F, 3I))::T == hvcat((2,2), [α], E, F, Matrix(3I, 3, 3))
+        @test (hvcat((2,2), 3I, F, E, α))::T == hvcat((2,2), Matrix(3I, 3, 3), F, E, [α])
     end
 end
 
@@ -452,6 +474,17 @@ end
     target = J * A * alpha + C * beta
     @test mul!(copy(C), J, A, alpha, beta) ≈ target
     @test mul!(copy(C), A, J, alpha, beta) ≈ target
+
+    a = randn()
+    C = randn(3, 3)
+    target_5mul = a*alpha*J + beta*C
+    @test mul!(copy(C), a, J, alpha, beta) ≈ target_5mul
+    @test mul!(copy(C), J, a, alpha, beta) ≈ target_5mul
+    target_5mul = beta*C # alpha = 0
+    @test mul!(copy(C), a, J, 0, beta) ≈ target_5mul
+    target_5mul = a*alpha*Matrix(J, 3, 3) # beta = 0
+    @test mul!(copy(C), a, J, alpha, 0) ≈ target_5mul
+
 end
 
 @testset "Construct Diagonal from UniformScaling" begin
@@ -460,6 +493,20 @@ end
     @test I(3) == [1 0 0; 0 1 0; 0 0 1]
 end
 
+@testset "dot" begin
+    A = randn(3, 3)
+    λ = randn()
+    J = UniformScaling(λ)
+    @test dot(A, J) ≈ dot(J, A)
+    @test dot(A, J) ≈ tr(A' * J)
+
+    A = rand(ComplexF64, 3, 3)
+    λ = randn() + im * randn()
+    J = UniformScaling(λ)
+    @test dot(A, J) ≈ conj(dot(J, A))
+    @test dot(A, J) ≈ tr(A' * J)
+end
+
 @testset "generalized dot" begin
     x = rand(-10:10, 3)
     y = rand(-10:10, 3)
@@ -473,7 +520,7 @@ end
 
 @testset "Factorization solutions" begin
     J = complex(randn(),randn()) * I
-    qrp = A -> qr(A, Val(true))
+    qrp = A -> qr(A, ColumnNorm())
 
     # thin matrices
     X = randn(3,2)
@@ -504,4 +551,20 @@ end
     end
 end
 
+@testset "offset arrays" begin
+    A = OffsetArray(zeros(4,4), -1:2, 0:3)
+    @test sum(I + A) ≈ 3.0
+    @test sum(A + I) ≈ 3.0
+    @test sum(I - A) ≈ 3.0
+    @test sum(A - I) ≈ -3.0
+end
+
+@testset "type promotion when dividing UniformScaling by matrix" begin
+    A = randn(5,5)
+    cA = complex(A)
+    J = (5+2im)*I
+    @test J/A ≈ J/cA
+    @test A\J ≈ cA\J
+end
+
 end # module TestUniformscaling
diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md
index 0bb1a9c5e89ce1..7a6fbbbdd20810 100644
--- a/stdlib/Logging/docs/src/index.md
+++ b/stdlib/Logging/docs/src/index.md
@@ -1,4 +1,4 @@
-# Logging
+# [Logging](@id man-logging)
 
 The [`Logging`](@ref Logging.Logging) module provides a way to record the history and progress of a
 computation as a log of events.  Events are created by inserting a logging
@@ -59,14 +59,14 @@ automatically extracted. Let's examine the user-defined data first:
   filtering. There are several standard levels of type [`LogLevel`](@ref);
   user-defined levels are also possible.
   Each is distinct in purpose:
-  - `Debug` is information intended for the developer of the program.
-    These events are disabled by default.
-  - `Info` is for general information to the user.
+  - [`Logging.Debug`](@ref) (log level -1000) is information intended for the developer of
+    the program. These events are disabled by default.
+  - [`Logging.Info`](@ref) (log level 0) is for general information to the user.
     Think of it as an alternative to using `println` directly.
-  - `Warn` means something is wrong and action is likely required
-    but that for now the program is still working.
-  - `Error` means something is wrong and it is unlikely to be recovered,
-    at least by this part of the code.
+  - [`Logging.Warn`](@ref) (log level 1000) means something is wrong and action is likely
+    required but that for now the program is still working.
+  - [`Logging.Error`](@ref) (log level 2000) means something is wrong and it is unlikely to
+    be recovered, at least by this part of the code.
     Often this log-level is unneeded as throwing an exception can convey
     all the required information.
 
@@ -217,7 +217,12 @@ julia> foo()
 
 ```
 
-## Writing log events to a file
+Use a comma separator to enable debug for multiple
+modules: `JULIA_DEBUG=loading,Main`.
+
+## Examples
+
+### Example: Writing log events to a file
 
 Sometimes it can be useful to write log events to a file. Here is an example
 of how to use a task-local and global logger to write information to a text
@@ -254,6 +259,25 @@ julia> @info("a global log message")
 julia> close(io)
 ```
 
+### Example: Enable debug-level messages
+
+Here is an example of creating a [`ConsoleLogger`](@ref) that lets through any messages
+with log level higher than, or equal, to [`Logging.Debug`](@ref).
+
+```julia-repl
+julia> using Logging
+
+# Create a ConsoleLogger that prints any log messages with level >= Debug to stderr
+julia> debuglogger = ConsoleLogger(stderr, Logging.Debug)
+
+# Enable debuglogger for a task
+julia> with_logger(debuglogger) do
+           @debug "a context specific log message"
+       end
+
+# Set the global logger
+julia> global_logger(debuglogger)
+```
 
 ## Reference
 
@@ -267,6 +291,10 @@ Logging.Logging
 ```@docs
 Logging.@logmsg
 Logging.LogLevel
+Logging.Debug
+Logging.Info
+Logging.Warn
+Logging.Error
 ```
 
 ### [Processing events with AbstractLogger](@id AbstractLogger-interface)
diff --git a/stdlib/Logging/src/ConsoleLogger.jl b/stdlib/Logging/src/ConsoleLogger.jl
index 2a96b08eb5ed2c..4e32b6b71f6562 100644
--- a/stdlib/Logging/src/ConsoleLogger.jl
+++ b/stdlib/Logging/src/ConsoleLogger.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    ConsoleLogger(stream=stderr, min_level=Info; meta_formatter=default_metafmt,
+    ConsoleLogger([stream,] min_level=Info; meta_formatter=default_metafmt,
                   show_limited=true, right_justify=0)
 
 Logger with formatting optimized for readability in a text console, for example
@@ -30,12 +30,19 @@ struct ConsoleLogger <: AbstractLogger
     right_justify::Int
     message_limits::Dict{Any,Int}
 end
-function ConsoleLogger(stream::IO=stderr, min_level=Info;
+function ConsoleLogger(stream::IO, min_level=Info;
                        meta_formatter=default_metafmt, show_limited=true,
                        right_justify=0)
     ConsoleLogger(stream, min_level, meta_formatter,
                   show_limited, right_justify, Dict{Any,Int}())
 end
+function ConsoleLogger(min_level=Info;
+                       meta_formatter=default_metafmt, show_limited=true,
+                       right_justify=0)
+    ConsoleLogger(closed_stream, min_level, meta_formatter,
+                  show_limited, right_justify, Dict{Any,Int}())
+end
+
 
 shouldlog(logger::ConsoleLogger, level, _module, group, id) =
     get(logger.message_limits, id, 1) > 0
@@ -63,7 +70,7 @@ function default_metafmt(level::LogLevel, _module, group, id, file, line)
     prefix = string(level == Warn ? "Warning" : string(level), ':')
     suffix::String = ""
     Info <= level < Warn && return color, prefix, suffix
-    _module !== nothing && (suffix *= "$(_module)")
+    _module !== nothing && (suffix *= string(_module)::String)
     if file !== nothing
         _module !== nothing && (suffix *= " ")
         suffix *= Base.contractuser(file)::String
@@ -110,12 +117,16 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
     # Generate a text representation of the message and all key value pairs,
     # split into lines.
     msglines = [(indent=0, msg=l) for l in split(chomp(string(message)::String), '\n')]
-    dsize = displaysize(logger.stream)::Tuple{Int,Int}
+    stream = logger.stream
+    if !isopen(stream)
+        stream = stderr
+    end
+    dsize = displaysize(stream)::Tuple{Int,Int}
     nkwargs = length(kwargs)::Int
     if nkwargs > hasmaxlog
         valbuf = IOBuffer()
         rows_per_value = max(1, dsize[1] ÷ (nkwargs + 1 - hasmaxlog))
-        valio = IOContext(IOContext(valbuf, logger.stream),
+        valio = IOContext(IOContext(valbuf, stream),
                           :displaysize => (rows_per_value, dsize[2] - 5),
                           :limit => logger.show_limited)
         for (key, val) in kwargs
@@ -136,7 +147,7 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
     color, prefix, suffix = logger.meta_formatter(level, _module, group, id, filepath, line)::Tuple{Union{Symbol,Int},String,String}
     minsuffixpad = 2
     buf = IOBuffer()
-    iob = IOContext(buf, logger.stream)
+    iob = IOContext(buf, stream)
     nonpadwidth = 2 + (isempty(prefix) || length(msglines) > 1 ? 0 : length(prefix)+1) +
                   msglines[end].indent + termlength(msglines[end].msg) +
                   (isempty(suffix) ? 0 : length(suffix)+minsuffixpad)
@@ -164,6 +175,6 @@ function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module
         println(iob)
     end
 
-    write(logger.stream, take!(buf))
+    write(stream, take!(buf))
     nothing
 end
diff --git a/stdlib/Logging/src/Logging.jl b/stdlib/Logging/src/Logging.jl
index b44b8ae67473c2..0743c650326cc9 100644
--- a/stdlib/Logging/src/Logging.jl
+++ b/stdlib/Logging/src/Logging.jl
@@ -12,7 +12,7 @@ module Logging
 # Doing it this way (rather than with import) makes these symbols accessible to
 # tab completion.
 for sym in [
-    :LogLevel, :BelowMinLevel, :Debug, :Info, :Warn, :Error, :AboveMaxLevel,
+    :LogLevel, :BelowMinLevel, :AboveMaxLevel,
     :AbstractLogger,
     :NullLogger,
     :handle_message, :shouldlog, :min_enabled_level, :catch_exceptions,
@@ -29,6 +29,35 @@ for sym in [
     @eval const $sym = Base.CoreLogging.$sym
 end
 
+# LogLevel aliases (re-)documented here (JuliaLang/julia#40978)
+"""
+    Debug
+
+Alias for [`LogLevel(-1000)`](@ref LogLevel).
+"""
+const Debug = Base.CoreLogging.Debug
+"""
+    Info
+
+Alias for [`LogLevel(0)`](@ref LogLevel).
+"""
+const Info = Base.CoreLogging.Info
+"""
+    Warn
+
+Alias for [`LogLevel(1000)`](@ref LogLevel).
+"""
+const Warn = Base.CoreLogging.Warn
+"""
+    Error
+
+Alias for [`LogLevel(2000)`](@ref LogLevel).
+"""
+const Error = Base.CoreLogging.Error
+
+using Base.CoreLogging:
+    closed_stream
+
 export
     AbstractLogger,
     LogLevel,
@@ -43,7 +72,13 @@ export
     global_logger,
     disable_logging,
     SimpleLogger,
-    ConsoleLogger
+    ConsoleLogger,
+    BelowMinLevel,
+    Debug,
+    Info,
+    Warn,
+    Error,
+    AboveMaxLevel
 
 include("ConsoleLogger.jl")
 
@@ -56,7 +91,7 @@ include("ConsoleLogger.jl")
 #  handle_message, shouldlog, min_enabled_level, catch_exceptions,
 
 function __init__()
-    global_logger(ConsoleLogger(stderr))
+    global_logger(ConsoleLogger())
 end
 
 end
diff --git a/stdlib/Logging/test/runtests.jl b/stdlib/Logging/test/runtests.jl
index 7168da07e80420..b6b48139645368 100644
--- a/stdlib/Logging/test/runtests.jl
+++ b/stdlib/Logging/test/runtests.jl
@@ -259,4 +259,20 @@ end
 
 end
 
+@testset "exported names" begin
+    m = Module(:ExportedLoggingNames)
+    include_string(m, """
+        using Logging
+        function run()
+            BelowMinLevel === Logging.BelowMinLevel &&
+            Debug === Logging.Debug &&
+            Info === Logging.Info &&
+            Warn === Logging.Warn &&
+            Error === Logging.Error &&
+            AboveMaxLevel === Logging.AboveMaxLevel
+        end
+        """)
+    @test m.run()
+end
+
 end
diff --git a/stdlib/MPFR_jll/Project.toml b/stdlib/MPFR_jll/Project.toml
index d3f435887cc7fe..22aa30d20511b9 100644
--- a/stdlib/MPFR_jll/Project.toml
+++ b/stdlib/MPFR_jll/Project.toml
@@ -9,3 +9,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 718c18842f086f..e782d92eab2b96 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -16,7 +16,7 @@ $(build_datarootdir)/julia/stdlib/$(VERSDIR):
 	mkdir -p $@
 
 JLLS = DSFMT GMP CURL LIBGIT2 LLVM LIBSSH2 LIBUV MBEDTLS MPFR NGHTTP2 \
-       BLASTRAMPOLINE OPENBLAS OPENLIBM P7ZIP PCRE SUITESPARSE ZLIB \
+       BLASTRAMPOLINE OPENBLAS OPENLIBM P7ZIP PCRE LIBSUITESPARSE ZLIB \
        LLVMUNWIND CSL UNWIND
 
 # Initialize this with JLLs that aren't in deps/Versions.make
@@ -44,7 +44,7 @@ STDLIBS = Artifacts Base64 CRC32c Dates DelimitedFiles Distributed FileWatching
           SharedArrays Sockets SparseArrays SuiteSparse Test TOML Unicode UUIDs \
           $(JLL_NAMES)
 
-STDLIBS_EXT = Pkg Statistics LibCURL Downloads ArgTools Tar NetworkOptions
+STDLIBS_EXT = Pkg Statistics LibCURL Downloads ArgTools Tar NetworkOptions SuiteSparse
 PKG_GIT_URL := git://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
 STATISTICS_GIT_URL := git://github.com/JuliaLang/Statistics.jl.git
@@ -59,6 +59,8 @@ TAR_GIT_URL := git://github.com/JuliaIO/Tar.jl.git
 TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
 NETWORKOPTIONS_GIT_URL := git://github.com/JuliaLang/NetworkOptions.jl.git
 NETWORKOPTIONS_TAR_URL = https://api.github.com/repos/JuliaLang/NetworkOptions.jl/tarball/$1
+SUITESPARSE_GIT_URL := git://github.com/JuliaLang/SuiteSparse.jl.git
+SUITESPARSE_TAR_URL = https://api.github.com/repos/JuliaLang/SuiteSparse.jl/tarball/$1
 
 $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z))))
 
@@ -68,10 +70,16 @@ $(foreach module, $(STDLIBS), $(eval $(call symlink_target,$$(JULIAHOME)/stdlib/
 STDLIBS_LINK_TARGETS := $(addprefix $(build_datarootdir)/julia/stdlib/$(VERSDIR)/,$(STDLIBS))
 
 getall get: $(addprefix get-, $(STDLIBS_EXT) $(JLL_NAMES))
-install: $(addprefix install-, $(STDLIBS_EXT) $(JLL_NAMES)) $(STDLIBS_LINK_TARGETS)
-clean: $(addprefix clean-, $(STDLIBS_EXT)) $(CLEAN_TARGETS)
+install: version-check $(addprefix install-, $(STDLIBS_EXT) $(JLL_NAMES)) $(STDLIBS_LINK_TARGETS)
+version-check: $(addprefix version-check-, $(STDLIBS_EXT))
+uninstall: $(addprefix uninstall-, $(STDLIBS_EXT))
+extstdlibclean:
+	for module in $(STDLIBS_EXT) ; do \
+		rm -rf $(JULIAHOME)/stdlib/$${module}-*; \
+	done
+clean: $(addprefix clean-, $(STDLIBS_EXT)) $(CLEAN_TARGETS) extstdlibclean
 distclean: $(addprefix distclean-, $(STDLIBS_EXT)) clean
 checksumall: $(addprefix checksum-, $(STDLIBS_EXT))
 
-DEP_LIBS_STAGED := $(STDLIBS_EXT)
+DEP_LIBS_STAGED_ALL := $(STDLIBS_EXT)
 include $(JULIAHOME)/deps/tools/uninstallers.mk
diff --git a/stdlib/Markdown/src/render/latex.jl b/stdlib/Markdown/src/render/latex.jl
index ee546be555fa45..d18a2e760ef3df 100644
--- a/stdlib/Markdown/src/render/latex.jl
+++ b/stdlib/Markdown/src/render/latex.jl
@@ -33,8 +33,8 @@ function latex(io::IO, header::Header{l}) where l
 end
 
 function latex(io::IO, code::Code)
+    occursin("\\end{verbatim}", code.code) && error("Cannot include \"\\end{verbatim}\" in a latex code block")
     wrapblock(io, "verbatim") do
-        # TODO latex escape
         println(io, code.code)
     end
 end
diff --git a/stdlib/Markdown/src/render/terminal/formatting.jl b/stdlib/Markdown/src/render/terminal/formatting.jl
index bacd82f7ed021d..87022124b9c8a8 100644
--- a/stdlib/Markdown/src/render/terminal/formatting.jl
+++ b/stdlib/Markdown/src/render/terminal/formatting.jl
@@ -9,8 +9,9 @@ end
 words(s) = split(s, " ")
 lines(s) = split(s, "\n")
 
-function wrapped_lines!(lines, io::IO, s::AbstractString, width, i)
+function wrapped_line(io::IO, s::AbstractString, width, i)
     ws = words(s)
+    lines = String[]
     for word in ws
         word_length = ansi_length(word)
         word_length == 0 && continue
@@ -22,19 +23,16 @@ function wrapped_lines!(lines, io::IO, s::AbstractString, width, i)
             lines[end] *= " " * word   # this could be more efficient
         end
     end
-    return i
+    return i, lines
 end
 
 function wrapped_lines(io::IO, s::AbstractString; width = 80, i = 0)
-    lines = AbstractString[]
-    if occursin(r"\n", s)
-        for ss in split(s, "\n")
-            i = wrapped_lines!(lines, io, ss, width, i)
-        end
-    else
-        wrapped_lines!(lines, io, s, width, i)
+    ls = String[]
+    for ss in lines(s)
+        i, line = wrapped_line(io, ss, width, i)
+        append!(ls, line)
     end
-    return lines
+    return ls
 end
 
 wrapped_lines(io::IO, f::Function, args...; width = 80, i = 0) =
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index f9983d10089f9d..51f2e44c642cdb 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -1222,3 +1222,32 @@ end
                """)
 end
 
+@testset "issue #37232: linebreaks" begin
+    s = @md_str """
+       Misc:\\
+       - line\\
+       """
+    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line"
+end
+
+@testset "pullrequest #41552: a code block has \\end{verbatim}" begin
+    s1 = md"""
+         ```tex
+         \begin{document}
+         \end{document}
+         ```
+         """
+    s2 = md"""
+         ```tex
+         \begin{verbatim}
+         \end{verbatim}
+         ```
+         """
+    @test Markdown.latex(s1) == """
+                                \\begin{verbatim}
+                                \\begin{document}
+                                \\end{document}
+                                \\end{verbatim}
+                                """
+    @test_throws ErrorException Markdown.latex(s2)
+end
diff --git a/stdlib/MbedTLS_jll/Project.toml b/stdlib/MbedTLS_jll/Project.toml
index 76ecb8a727725f..9533336ca9f812 100644
--- a/stdlib/MbedTLS_jll/Project.toml
+++ b/stdlib/MbedTLS_jll/Project.toml
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl
index 9a9d795a5aa17a..629f53e8371edd 100644
--- a/stdlib/Mmap/src/Mmap.jl
+++ b/stdlib/Mmap/src/Mmap.jl
@@ -13,7 +13,7 @@ const PAGESIZE = Int(Sys.isunix() ? ccall(:jl_getpagesize, Clong, ()) : ccall(:j
 
 # for mmaps not backed by files
 mutable struct Anonymous <: IO
-    name::AbstractString
+    name::String
     readonly::Bool
     create::Bool
 end
@@ -191,7 +191,11 @@ function mmap(io::IO,
     isopen(io) || throw(ArgumentError("$io must be open to mmap"))
     isbitstype(T)  || throw(ArgumentError("unable to mmap $T; must satisfy isbitstype(T) == true"))
 
-    len = prod(dims) * sizeof(T)
+    len = sizeof(T)
+    for l in dims
+        len, overflow = Base.Checked.mul_with_overflow(promote(len, l)...)
+        overflow && throw(ArgumentError("requested size prod($((sizeof(T), dims...))) too large, would overflow typeof(size(T)) == $(typeof(len))"))
+    end
     len >= 0 || throw(ArgumentError("requested size must be ≥ 0, got $len"))
     len == 0 && return Array{T}(undef, ntuple(x->0,Val(N)))
     len < typemax(Int) - PAGESIZE || throw(ArgumentError("requested size must be < $(typemax(Int)-PAGESIZE), got $len"))
diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl
index 51bf898e94b48f..0b3cb0b9f1a426 100644
--- a/stdlib/Mmap/test/runtests.jl
+++ b/stdlib/Mmap/test/runtests.jl
@@ -133,6 +133,7 @@ c = mmap(s, Vector{UInt8}, (UInt16(11),))
 finalize(c); c=nothing; GC.gc()
 @test_throws ArgumentError mmap(s, Vector{UInt8}, (Int16(-11),))
 @test_throws ArgumentError mmap(s, Vector{UInt8}, (typemax(UInt),))
+@test_throws ArgumentError mmap(s, Matrix{UInt8}, (typemax(Int) - Mmap.PAGESIZE - 1, 2)) # overflow
 close(s)
 s = open(file, "r+")
 @test isreadonly(s) == false
diff --git a/stdlib/NetworkOptions.version b/stdlib/NetworkOptions.version
index b8e8b3d371c381..6a9bf2182fc99a 100644
--- a/stdlib/NetworkOptions.version
+++ b/stdlib/NetworkOptions.version
@@ -1,2 +1,2 @@
 NETWORKOPTIONS_BRANCH = master
-NETWORKOPTIONS_SHA1 = a251de1e1c8ce4edc351d0f05233ba7fe7d2c27a
+NETWORKOPTIONS_SHA1 = 42a0b5fcb7edb8ed5b0ae699f15ca6aedc0098ca
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index e732272c12e09d..c7d59bb0328974 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.13+3"
+version = "0.3.17+0"
 
 [deps]
 CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
@@ -9,3 +9,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.7"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/OpenLibm_jll/Project.toml b/stdlib/OpenLibm_jll/Project.toml
index 9afded576bd7a5..1423324139eab9 100644
--- a/stdlib/OpenLibm_jll/Project.toml
+++ b/stdlib/OpenLibm_jll/Project.toml
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.0"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/PCRE2_jll/Project.toml b/stdlib/PCRE2_jll/Project.toml
index 07fec75d3aa471..b7718fcf79f480 100644
--- a/stdlib/PCRE2_jll/Project.toml
+++ b/stdlib/PCRE2_jll/Project.toml
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 5d9153eefe8535..4ed54226e67e77 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,2 +1,2 @@
 PKG_BRANCH = master
-PKG_SHA1 = af7e41cd9d9529bfc8e8fecd7e24c7392c73cdbc
+PKG_SHA1 = 252e895056b17490bfeabd81f52743bad947e997
diff --git a/stdlib/Printf/docs/src/index.md b/stdlib/Printf/docs/src/index.md
index 828e527ed0cadf..48e38e2b2ce5b4 100644
--- a/stdlib/Printf/docs/src/index.md
+++ b/stdlib/Printf/docs/src/index.md
@@ -1,4 +1,4 @@
-# Printf
+# [Printf](@id man-printf)
 
 ```@docs
 Printf.@printf
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index ac57f95ec6bb96..8dacee5a1dc1b3 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -13,6 +13,7 @@ const Chars = Union{Val{'c'}, Val{'C'}}
 const Strings = Union{Val{'s'}, Val{'S'}}
 const Pointer = Val{'p'}
 const HexBases = Union{Val{'x'}, Val{'X'}, Val{'a'}, Val{'A'}}
+const PositionCounter = Val{'n'}
 
 """
 Typed representation of a format specifier.
@@ -219,15 +220,17 @@ end
 
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
     leftalign, width = spec.leftalign, spec.width
-    if !leftalign && width > 1
-        for _ = 1:(width - 1)
+    c = Char(first(arg))
+    w = textwidth(c)
+    if !leftalign && width > w
+        for _ = 1:(width - w)
             buf[pos] = UInt8(' ')
             pos += 1
         end
     end
-    pos = writechar(buf, pos, arg isa String ? arg[1] : Char(arg))
-    if leftalign && width > 1
-        for _ = 1:(width - 1)
+    pos = writechar(buf, pos, c)
+    if leftalign && width > w
+        for _ = 1:(width - w)
             buf[pos] = UInt8(' ')
             pos += 1
         end
@@ -239,7 +242,7 @@ end
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
     leftalign, hash, width, prec = spec.leftalign, spec.hash, spec.width, spec.precision
     str = string(arg)
-    slen = length(str) + (hash ? arg isa AbstractString ? 2 : 1 : 0)
+    slen = textwidth(str) + (hash ? arg isa AbstractString ? 2 : 1 : 0)
     op = p = prec == -1 ? slen : min(slen, prec)
     if !leftalign && width > p
         for _ = 1:(width - p)
@@ -259,9 +262,9 @@ end
         end
     end
     for c in str
-        p == 0 && break
+        p -= textwidth(c)
+        p < 0 && break
         pos = writechar(buf, pos, c)
-        p -= 1
     end
     if hash && arg isa AbstractString && p > 0
         buf[pos] = UInt8('"')
@@ -421,11 +424,34 @@ const __BIG_FLOAT_MAX__ = 8192
     elseif T == Val{'f'} || T == Val{'F'}
         newpos = Ryu.writefixed(buf, pos, x, prec, plus, space, hash, UInt8('.'))
     elseif T == Val{'g'} || T == Val{'G'}
-        prec = prec == 0 ? 1 : prec
-        x = round(x, sigdigits=prec)
-        newpos = Ryu.writeshortest(buf, pos, x, plus, space, hash, prec, T == Val{'g'} ? UInt8('e') : UInt8('E'), true, UInt8('.'))
+        if isinf(x) || isnan(x)
+            newpos = Ryu.writeshortest(buf, pos, x, plus, space)
+        else
+            # C11-compliant general format
+            prec = prec == 0 ? 1 : prec
+            # format the value in scientific notation and parse the exponent part
+            exp = let p = Ryu.writeexp(buf, pos, x, prec)
+                b1, b2, b3, b4 = buf[p-4], buf[p-3], buf[p-2], buf[p-1]
+                Z = UInt8('0')
+                if b1 == UInt8('e')
+                    # two-digit exponent
+                    sign = b2 == UInt8('+') ? 1 : -1
+                    exp = 10 * (b3 - Z) + (b4 - Z)
+                else
+                    # three-digit exponent
+                    sign = b1 == UInt8('+') ? 1 : -1
+                    exp = 100 * (b2 - Z) + 10 * (b3 - Z) + (b4 - Z)
+                end
+                flipsign(exp, sign)
+            end
+            if -4 ≤ exp < prec
+                newpos = Ryu.writefixed(buf, pos, x, prec - (exp + 1), plus, space, hash, UInt8('.'), !hash)
+            else
+                newpos = Ryu.writeexp(buf, pos, x, prec - 1, plus, space, hash, T == Val{'g'} ? UInt8('e') : UInt8('E'), UInt8('.'), !hash)
+            end
+        end
     elseif T == Val{'a'} || T == Val{'A'}
-        x, neg = x < 0 ? (-x, true) : (x, false)
+        x, neg = x < 0 || x === -Base.zero(x) ? (-x, true) : (x, false)
         newpos = pos
         if neg
             buf[newpos] = UInt8('-')
@@ -456,6 +482,8 @@ const __BIG_FLOAT_MAX__ = 8192
                 buf[newpos] = UInt8('0')
                 newpos += 1
                 if prec > 0
+                    buf[newpos] = UInt8('.')
+                    newpos += 1
                     while prec > 0
                         buf[newpos] = UInt8('0')
                         newpos += 1
@@ -465,6 +493,7 @@ const __BIG_FLOAT_MAX__ = 8192
                 buf[newpos] = T <: Val{'a'} ? UInt8('p') : UInt8('P')
                 buf[newpos + 1] = UInt8('+')
                 buf[newpos + 2] = UInt8('0')
+                newpos += 3
             else
                 if prec > -1
                     s, p = frexp(x)
@@ -547,7 +576,13 @@ const __BIG_FLOAT_MAX__ = 8192
 end
 
 # pointers
-fmt(buf, pos, arg, spec::Spec{Pointer}) = fmt(buf, pos, Int(arg), ptrfmt(spec, arg))
+fmt(buf, pos, arg, spec::Spec{Pointer}) = fmt(buf, pos, UInt64(arg), ptrfmt(spec, arg))
+
+# position counters
+function fmt(buf, pos, arg::Ref{<:Integer}, ::Spec{PositionCounter})
+    arg[] = pos - 1
+    pos
+end
 
 # old Printf compat
 function fix_dec end
@@ -726,13 +761,18 @@ const UNROLL_UPTO = 16
     return pos
 end
 
-plength(f::Spec{T}, x) where {T <: Chars} = max(f.width, 1) + (ncodeunits(x isa AbstractString ? x[1] : Char(x)) - 1)
+function plength(f::Spec{T}, x) where {T <: Chars}
+    c = Char(first(x))
+    w = textwidth(c)
+    return max(f.width, w) + (ncodeunits(c) - w)
+end
 plength(f::Spec{Pointer}, x) = max(f.width, 2 * sizeof(x) + 2)
 
 function plength(f::Spec{T}, x) where {T <: Strings}
     str = string(x)
-    p = f.precision == -1 ? (length(str) + (f.hash ? (x isa Symbol ? 1 : 2) : 0)) : f.precision
-    return max(f.width, p) + (sizeof(str) - length(str))
+    sw = textwidth(str)
+    p = f.precision == -1 ? (sw + (f.hash ? (x isa Symbol ? 1 : 2) : 0)) : f.precision
+    return max(f.width, p) + (sizeof(str) - sw)
 end
 
 function plength(f::Spec{T}, x) where {T <: Ints}
@@ -744,6 +784,7 @@ plength(f::Spec{T}, x::AbstractFloat) where {T <: Ints} =
     max(f.width, 0 + 309 + 17 + f.hash + 5)
 plength(f::Spec{T}, x) where {T <: Floats} =
     max(f.width, f.precision + 309 + 17 + f.hash + 5)
+plength(::Spec{PositionCounter}, x) = 0
 
 @inline function computelen(substringranges, formats, args)
     len = sum(length, substringranges)
@@ -821,7 +862,7 @@ Use shorter of decimal or scientific 1.23 1.23e+07
 ```
 
 For a systematic specification of the format, see [here](https://www.cplusplus.com/reference/cstdio/printf/).
-See also: [`@sprintf`](@ref).
+See also [`@sprintf`](@ref).
 
 # Caveats
 `Inf` and `NaN` are printed consistently as `Inf` and `NaN` for flags `%a`, `%A`,
@@ -837,6 +878,12 @@ Inf Inf NaN NaN
 julia> @printf "%.0f %.1f %f" 0.5 0.025 -0.0078125
 0 0.0 -0.007812
 ```
+
+!!! compat "Julia 1.7"
+    Starting in Julia 1.7, `%s` (string) and `%c` (character) widths are computed
+    using [`textwidth`](@ref), which e.g. ignores zero-width characters
+    (such as combining characters for diacritical marks) and treats certain
+    "wide" characters (e.g. emoji) as width `2`.
 """
 macro printf(io_or_fmt, args...)
     if io_or_fmt isa String
diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index d38c90734d4f17..3d7929c42e9e05 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -19,11 +19,17 @@ using Test, Printf
         @test (@sprintf "%-20p" C_NULL) == "0x00000000          "
     end
 
+    #40318
+    @test @sprintf("%p", 0xfffffffffffe0000) == "0xfffffffffffe0000"
+
 end
 
 @testset "%a" begin
 
     # hex float
+    @test (Printf.@sprintf "%a" 0.0) == "0x0p+0"
+    @test (Printf.@sprintf "%a" -0.0) == "-0x0p+0"
+    @test (Printf.@sprintf "%.3a" 0.0) == "0x0.000p+0"
     @test (Printf.@sprintf "%a" 1.5) == "0x1.8p+0"
     @test (Printf.@sprintf "%a" 1.5f0) == "0x1.8p+0"
     @test (Printf.@sprintf "%a" big"1.5") == "0x1.8p+0"
@@ -88,6 +94,15 @@ end
     @test Printf.@sprintf("%g", 123456.7) == "123457"
     @test Printf.@sprintf("%g", 1234567.8) == "1.23457e+06"
 
+    # %g regression gh #41631
+    for (val, res) in ((Inf, "Inf"),
+                       (-Inf, "-Inf"),
+                       (NaN, "NaN"),
+                       (-NaN, "NaN"))
+        @test Printf.@sprintf("%g", val) == res
+        @test Printf.@sprintf("%G", val) == res
+    end
+
     # zeros
     @test Printf.@sprintf("%.15g", 0) == "0"
     @test Printf.@sprintf("%#.15g", 0) == "0.00000000000000"
@@ -104,9 +119,9 @@ end
     @test (Printf.@sprintf "%f" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+f" -Inf) == "-Inf"
     @test (Printf.@sprintf "%f" NaN) == "NaN"
-    @test (Printf.@sprintf "%+f" NaN) == "NaN"
-    @test (Printf.@sprintf "% f" NaN) == "NaN"
-    @test (Printf.@sprintf "% #f" NaN) == "NaN"
+    @test (Printf.@sprintf "%+f" NaN) == "+NaN"
+    @test (Printf.@sprintf "% f" NaN) == " NaN"
+    @test (Printf.@sprintf "% #f" NaN) == " NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -141,6 +156,10 @@ end
     @test Printf.@sprintf("%+ 09.1f", 1.234) == "+000001.2"
     @test Printf.@sprintf("%+ 09.0f", 1.234) == "+00000001"
     @test Printf.@sprintf("%+ #09.0f", 1.234) == "+0000001."
+
+    #40303
+    @test Printf.@sprintf("%+7.1f", 9.96) == "  +10.0"
+    @test Printf.@sprintf("% 7.1f", 9.96) == "   10.0"
 end
 
 @testset "%e" begin
@@ -153,9 +172,9 @@ end
     @test (Printf.@sprintf "%e" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+e" -Inf) == "-Inf"
     @test (Printf.@sprintf "%e" NaN) == "NaN"
-    @test (Printf.@sprintf "%+e" NaN) == "NaN"
-    @test (Printf.@sprintf "% e" NaN) == "NaN"
-    @test (Printf.@sprintf "% #e" NaN) == "NaN"
+    @test (Printf.@sprintf "%+e" NaN) == "+NaN"
+    @test (Printf.@sprintf "% e" NaN) == " NaN"
+    @test (Printf.@sprintf "% #e" NaN) == " NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -202,6 +221,10 @@ end
     @test Printf.@sprintf("%+ 09.1e", 1.234) == "+01.2e+00"
     @test Printf.@sprintf("%+ 09.0e", 1.234) == "+0001e+00"
     @test Printf.@sprintf("%+ #09.0e", 1.234) == "+001.e+00"
+
+    #40303
+    @test Printf.@sprintf("%+9.1e", 9.96) == " +1.0e+01"
+    @test Printf.@sprintf("% 9.1e", 9.96) == "  1.0e+01"
 end
 
 @testset "strings" begin
@@ -247,6 +270,12 @@ end
     @test (Printf.@sprintf "%-.3s" "test") == "tes"
     @test (Printf.@sprintf "%#-.3s" "test") == "\"te"
 
+    # issue #41068
+    @test Printf.@sprintf("%.2s", "föó") == "fö"
+    @test Printf.@sprintf("%5s", "föó") == "  föó"
+    @test Printf.@sprintf("%6s", "😍🍕") == "  😍🍕"
+    @test Printf.@sprintf("%2c", '🍕') == "🍕"
+    @test Printf.@sprintf("%3c", '🍕') == " 🍕"
 end
 
 @testset "chars" begin
@@ -435,6 +464,14 @@ end
     @test Printf.@sprintf("%e", 1) == "1.000000e+00"
     @test Printf.@sprintf("%g", 1) == "1"
 
+    # issue #39748
+    @test Printf.@sprintf("%.16g", 194.4778127560983) == "194.4778127560983"
+    @test Printf.@sprintf("%.17g", 194.4778127560983) == "194.4778127560983"
+    @test Printf.@sprintf("%.18g", 194.4778127560983) == "194.477812756098302"
+    @test Printf.@sprintf("%.1g", 1.7976931348623157e308) == "2e+308"
+    @test Printf.@sprintf("%.2g", 1.7976931348623157e308) == "1.8e+308"
+    @test Printf.@sprintf("%.3g", 1.7976931348623157e308) == "1.8e+308"
+
     # escaped '%'
     @test_throws ArgumentError @sprintf("%s%%%s", "a")
     @test @sprintf("%s%%%s", "a", "b") == "a%b"
@@ -727,4 +764,11 @@ end
 
 end
 
+@testset "%n" begin
+    x = Ref{Int}()
+    @test (Printf.@sprintf("%d4%n", 123, x); x[] == 4)
+    @test (Printf.@sprintf("%s%n", "😉", x); x[] == 4)
+    @test (Printf.@sprintf("%s%n", "1234", x); x[] == 4)
+end
+
 end # @testset "Printf"
diff --git a/stdlib/Profile/Project.toml b/stdlib/Profile/Project.toml
index 6aca0601439e33..1d13dad22233a3 100644
--- a/stdlib/Profile/Project.toml
+++ b/stdlib/Profile/Project.toml
@@ -5,8 +5,9 @@ uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Serialization"]
+test = ["Logging", "Serialization", "Test"]
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index f297ad12f80a16..48ee07de684238 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -7,6 +7,8 @@ module Profile
 
 import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
 
+const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+
 # deprecated functions: use `getdict` instead
 lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
 
@@ -39,11 +41,15 @@ end
 """
     init(; n::Integer, delay::Real))
 
-Configure the `delay` between backtraces (measured in seconds), and the number `n` of
-instruction pointers that may be stored. Each instruction pointer corresponds to a single
-line of code; backtraces generally consist of a long list of instruction pointers. Current
-settings can be obtained by calling this function with no arguments, and each can be set
-independently using keywords or in the order `(n, delay)`.
+Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
+stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
+list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
+NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
+using keywords or in the order `(n, delay)`.
+
+!!! compat "Julia 1.8"
+    As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled.
+    Previously this was `n` total.
 """
 function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing)
     n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
@@ -57,9 +63,20 @@ function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real}
 end
 
 function init(n::Integer, delay::Real)
-    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), n, round(UInt64,10^9*delay))
+    nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
+    sample_size_bytes = sizeof(Ptr) # == Sys.WORD_SIZE / 8
+    buffer_samples = n * nthreads
+    buffer_size_bytes = buffer_samples * sample_size_bytes
+    if buffer_size_bytes > 2^29 && Sys.WORD_SIZE == 32
+        buffer_size_bytes_per_thread = floor(Int, 2^29 / nthreads)
+        buffer_samples_per_thread = floor(Int, buffer_size_bytes_per_thread / sample_size_bytes)
+        buffer_samples = buffer_samples_per_thread * nthreads
+        buffer_size_bytes = buffer_samples * sample_size_bytes
+        @warn "Requested profile buffer limited to 512MB (n = $buffer_samples_per_thread per thread) given that this system is 32-bit"
+    end
+    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64,10^9*delay))
     if status == -1
-        error("could not allocate space for ", n, " instruction pointers")
+        error("could not allocate space for ", n, " instruction pointers per thread being profiled ($nthreads threads, $(Base.format_bytes(buffer_size_bytes)) total)")
     end
 end
 
@@ -126,6 +143,9 @@ The keyword arguments can be any combination of:
     line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples
     incurred by each function by itself.
 
+ - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:threads`, `:tasks`,
+    `[:threads, :tasks]`, or `[:tasks, :threads]` where the last two provide nested grouping.
+
  - `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`).
     A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`,
     where `n` is the number of samples on this line, and `N` is the number of samples for the callee.
@@ -135,9 +155,15 @@ The keyword arguments can be any combination of:
  - `recur` -- Controls the recursion handling in `:tree` format. `:off` (default) prints the tree as normal. `:flat` instead
     compresses any recursion (by ip), showing the approximate effect of converting any self-recursion into an iterator.
     `:flatc` does the same but also includes collapsing of C frames (may do odd things around `jl_apply`).
+
+ - `threads::Union{Int,AbstractVector{Int}}` -- Specify which threads to include snapshots from in the report. Note that
+    this does not control which threads samples are collected on.
+
+ - `tasks::Union{Int,AbstractVector{Int}}` -- Specify which tasks to include snapshots from in the report. Note that this
+    does not control which tasks samples are collected within.
 """
 function print(io::IO,
-        data::Vector{<:Unsigned} = fetch(),
+        data::Vector{<:Unsigned} = fetch(include_meta = true),
         lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)
         ;
         format = :tree,
@@ -147,32 +173,128 @@ function print(io::IO,
         mincount::Int = 0,
         noisefloor = 0,
         sortedby::Symbol = :filefuncline,
-        recur::Symbol = :off)
-    print(io, data, lidict, ProfileFormat(
-            C = C,
-            combine = combine,
-            maxdepth = maxdepth,
-            mincount = mincount,
-            noisefloor = noisefloor,
-            sortedby = sortedby,
-            recur = recur),
-        format)
-end
-
-function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat, format::Symbol)
+        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
+        recur::Symbol = :off,
+        threads::Union{Int,AbstractVector{Int}} = 1:Threads.nthreads(),
+        tasks::Union{UInt,AbstractVector{UInt}} = typemin(UInt):typemax(UInt))
+
+    pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
+    if groupby == :none
+        print(io, data, lidict, pf, format, threads, tasks, false)
+    else
+        if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
+            error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
+        elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]])
+            @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report"
+        end
+        any_nosamples = false
+        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
+        println(io, "=========================================================")
+        if groupby == [:task, :thread]
+            for taskid in intersect(get_task_ids(data), tasks)
+                threadids = intersect(get_thread_ids(data, taskid), threads)
+                if length(threadids) == 0
+                    any_nosamples = true
+                else
+                    nl = length(threadids) > 1 ? "\n" : ""
+                    printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
+                    for threadid in threadids
+                        printstyled(io, " Thread $threadid\n"; bold=true, color=Base.info_color())
+                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples && (any_nosamples = true)
+                        println(io)
+                    end
+                end
+            end
+        elseif groupby == [:thread, :task]
+            for threadid in intersect(get_thread_ids(data), threads)
+                taskids = intersect(get_task_ids(data, threadid), tasks)
+                if length(taskids) == 0
+                    any_nosamples = true
+                else
+                    nl = length(taskids) > 1 ? "\n" : ""
+                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
+                    for taskid in taskids
+                        printstyled(io, " Task $(Base.repr(taskid))\n"; bold=true, color=Base.debug_color())
+                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples && (any_nosamples = true)
+                        println(io)
+                    end
+                end
+            end
+        elseif groupby == :task
+            threads = 1:typemax(Int)
+            for taskid in intersect(get_task_ids(data), tasks)
+                printstyled(io, "Task $(Base.repr(taskid))\n"; bold=true, color=Base.debug_color())
+                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
+                nosamples && (any_nosamples = true)
+                println(io)
+            end
+        elseif groupby == :thread
+            tasks = 1:typemax(UInt)
+            for threadid in intersect(get_thread_ids(data), threads)
+                printstyled(io, "Thread $threadid\n"; bold=true, color=Base.info_color())
+                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
+                nosamples && (any_nosamples = true)
+                println(io)
+            end
+        end
+        any_nosamples && warning_empty(summary = true)
+    end
+    return
+end
+
+function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
+                format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
+                is_subsection::Bool = false)
     cols::Int = Base.displaysize(io)[2]
     data = convert(Vector{UInt64}, data)
     fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
     if format === :tree
-        tree(io, data, lidict, cols, fmt)
+        nosamples = tree(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
+        return nosamples
     elseif format === :flat
         fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
-        flat(io, data, lidict, cols, fmt)
+        nosamples = flat(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
+        return nosamples
     else
         throw(ArgumentError("output format $(repr(format)) not recognized"))
     end
 end
 
+function get_task_ids(data::Vector{<:Unsigned}, threadid = nothing)
+    taskids = UInt[]
+    for i in length(data):-1:1
+        if is_block_end(data, i)
+            if isnothing(threadid) || data[i - 5] == threadid
+                taskid = data[i - 4]
+                !in(taskid, taskids) && push!(taskids, taskid)
+            end
+        end
+    end
+    return taskids
+end
+
+function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing)
+    threadids = Int[]
+    for i in length(data):-1:1
+        if is_block_end(data, i)
+            if isnothing(taskid) || data[i - 4] == taskid
+                threadid = data[i - 5]
+                !in(threadid, threadids) && push!(threadids, threadid)
+            end
+        end
+    end
+    return sort(threadids)
+end
+
+function is_block_end(data, i)
+    i < nmeta + 1 && return false
+    # 32-bit linux has been seen to have rogue NULL ips, so we use two to indicate block end, where the 2nd is the
+    # actual end index
+    return data[i] == 0 && data[i - 1] == 0
+end
+
 """
     print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
 
@@ -182,27 +304,31 @@ a dictionary `lidict` of line information.
 
 See `Profile.print([io], data)` for an explanation of the valid keyword arguments.
 """
-print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
+print(data::Vector{<:Unsigned} = fetch(include_meta = true), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
     print(stdout, data, lidict; kwargs...)
 
 """
-    retrieve() -> data, lidict
+    retrieve(; kwargs...) -> data, lidict
 
 "Exports" profiling results in a portable format, returning the set of all backtraces
 (`data`) and a dictionary that maps the (session-specific) instruction pointers in `data` to
 `LineInfo` values that store the file name, function name, and line number. This function
 allows you to save profiling results for future analysis.
 """
-function retrieve()
-    data = fetch()
+function retrieve(; kwargs...)
+    data = fetch(; kwargs...)
     return (data, getdict(data))
 end
 
 function getdict(data::Vector{UInt})
-    # Lookup is expensive, so do it only once per ip.
-    udata = unique(data)
     dict = LineInfoDict()
-    for ip in udata
+    return getdict!(dict, data)
+end
+
+function getdict!(dict::LineInfoDict, data::Vector{UInt})
+    for ip in data
+        # Lookup is expensive, so do it only once per ip.
+        haskey(dict, UInt64(ip)) && continue
         st = lookup(convert(Ptr{Cvoid}, ip))
         # To correct line numbers for moving code, put it in the form expected by
         # Base.update_stackframes_callback[]
@@ -369,14 +495,15 @@ error_codes = Dict(
 
 
 """
-    fetch() -> data
+    fetch(;include_meta = false) -> data
 
 Returns a copy of the buffer of profile backtraces. Note that the
 values in `data` have meaning only on this machine in the current session, because it
 depends on the exact memory addresses used in JIT-compiling. This function is primarily for
 internal use; [`retrieve`](@ref) may be a better choice for most users.
+By default metadata such as threadid and taskid will be stripped. Set `include_meta` to `true` to include metadata.
 """
-function fetch()
+function fetch(;include_meta = false)
     maxlen = maxlen_data()
     len = len_data()
     if is_buffer_full()
@@ -386,14 +513,37 @@ function fetch()
     end
     data = Vector{UInt}(undef, len)
     GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len)
-    return data
+    if include_meta || isempty(data)
+        return data
+    else
+        nblocks = 0
+        for i = 2:length(data)
+            if is_block_end(data, i) # detect block ends and count them
+                nblocks += 1
+            end
+        end
+        data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
+        j = length(data_stripped)
+        i = length(data)
+        while i > 0 && j > 0
+            data_stripped[j] = data[i]
+            if is_block_end(data, i)
+                i -= (nmeta + 1) # metadata fields and the extra NULL IP
+            end
+            i -= 1
+            j -= 1
+        end
+        @assert i == j == 0 "metadata stripping failed i=$i j=$j data[1:i]=$(data[1:i])"
+        return data_stripped
+    end
 end
 
 
 ## Print as a flat list
 # Counts the number of times each line appears, at any nesting level and at the topmost level
 # Merging multiple equivalent entries and recursive calls
-function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool) where {T}
+function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool,
+                    threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}) where {T}
     lilist = StackFrame[]
     n = Int[]
     m = Int[]
@@ -401,44 +551,71 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
     recursive = Set{T}()
     first = true
     totalshots = 0
-    for ip in data
-        if ip == 0
+    startframe = length(data)
+    skip = false
+    nsleeping = 0
+    for i in startframe:-1:1
+        startframe - 1 <= i <= startframe - (nmeta + 1) && continue # skip metadata (it's read ahead below) and extra block-end NULL IP
+        ip = data[i]
+        if is_block_end(data, i)
+            # read metadata
+            thread_sleeping = data[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            # cpu_cycle_clock = data[i - 3]
+            taskid = data[i - 4]
+            threadid = data[i - 5]
+            if !in(threadid, threads) || !in(taskid, tasks)
+                skip = true
+                continue
+            end
+            if thread_sleeping == 1
+                nsleeping += 1
+            end
+            skip = false
             totalshots += 1
             empty!(recursive)
             first = true
-            continue
-        end
-        frames = lidict[ip]
-        nframes = (frames isa Vector ? length(frames) : 1)
-        for i = 1:nframes
-            frame = (frames isa Vector ? frames[i] : frames)
-            !C && frame.from_c && continue
-            key = (T === UInt64 ? ip : frame)
-            idx = get!(lilist_idx, key, length(lilist) + 1)
-            if idx > length(lilist)
-                push!(recursive, key)
-                push!(lilist, frame)
-                push!(n, 1)
-                push!(m, 0)
-            elseif !(key in recursive)
-                push!(recursive, key)
-                n[idx] += 1
-            end
-            if first
-                m[idx] += 1
-                first = false
+            startframe = i
+        elseif !skip
+            frames = lidict[ip]
+            nframes = (frames isa Vector ? length(frames) : 1)
+            for j = 1:nframes
+                frame = (frames isa Vector ? frames[j] : frames)
+                !C && frame.from_c && continue
+                key = (T === UInt64 ? ip : frame)
+                idx = get!(lilist_idx, key, length(lilist) + 1)
+                if idx > length(lilist)
+                    push!(recursive, key)
+                    push!(lilist, frame)
+                    push!(n, 1)
+                    push!(m, 0)
+                elseif !(key in recursive)
+                    push!(recursive, key)
+                    n[idx] += 1
+                end
+                if first
+                    m[idx] += 1
+                    first = false
+                end
             end
         end
     end
     @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
-    return (lilist, n, m, totalshots)
+    return (lilist, n, m, totalshots, nsleeping)
 end
 
-function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat)
-    lilist, n, m, totalshots = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C)
+function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
+                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
+    lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
+    util_perc = (1 - (nsleeping / totalshots)) * 100
     if isempty(lilist)
-        warning_empty()
-        return
+        if is_subsection
+            Base.print(io, "Total snapshots: ")
+            printstyled(io, "$(totalshots)", color=Base.warn_color())
+            Base.println(io, " (", round(Int, util_perc), "% utilization)")
+        else
+            warning_empty()
+        end
+        return true
     end
     if false # optional: drop the "non-interpretable" ones
         keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
@@ -448,8 +625,13 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
     end
     filenamemap = Dict{Symbol,String}()
     print_flat(io, lilist, n, m, cols, filenamemap, fmt)
-    Base.println(io, "Total snapshots: ", totalshots)
-    nothing
+    Base.print(io, "Total snapshots: ", totalshots, " (", round(Int, util_perc), "% utilization")
+    if is_subsection
+        println(io, ")")
+    else
+        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)")
+    end
+    return false
 end
 
 function print_flat(io::IO, lilist::Vector{StackFrame},
@@ -612,14 +794,31 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
 end
 
 # turn a list of backtraces into a tree (implicitly separated by NULL markers)
-function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol) where {T}
+function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol,
+                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}) where {T}
     parent = root
     tops = Vector{StackFrameTree{T}}()
     build = Vector{StackFrameTree{T}}()
     startframe = length(all)
+    skip = false
+    nsleeping = 0
     for i in startframe:-1:1
+        startframe - 1 <= i <= startframe - (nmeta + 1) && continue # skip metadata (its read ahead below) and extra block end NULL IP
         ip = all[i]
-        if ip == 0
+        if is_block_end(all, i)
+            # read metadata
+            thread_sleeping = all[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            # cpu_cycle_clock = all[i - 3]
+            taskid = all[i - 4]
+            threadid = all[i - 5]
+            if !in(threadid, threads) || !in(taskid, tasks)
+                skip = true
+                continue
+            end
+            if thread_sleeping == 1
+                nsleeping += 1
+            end
+            skip = false
             # sentinel value indicates the start of a new backtrace
             empty!(build)
             root.recur = 0
@@ -646,7 +845,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
             parent = root
             root.count += 1
             startframe = i
-        else
+        elseif !skip
             pushfirst!(build, parent)
             if recur === :flat || recur === :flatc
                 # Rewind the `parent` tree back, if this exact ip was already present *higher* in the current tree
@@ -687,6 +886,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
                 parent = this
                 continue
             end
+
             frames = lidict[ip]
             nframes = (frames isa Vector ? length(frames) : 1)
             this = parent
@@ -721,7 +921,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
         nothing
     end
     cleanup!(root)
-    return root
+    return root, nsleeping
 end
 
 function maxstats(root::StackFrameTree)
@@ -743,12 +943,14 @@ end
 
 # Print the stack frame tree starting at a particular root. Uses a worklist to
 # avoid stack overflows.
-function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat) where T
+function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
     maxes = maxstats(bt)
     filenamemap = Dict{Symbol,String}()
     worklist = [(bt, 0, 0, "")]
-    println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
-    println(io, "=========================================================")
+    if !is_subsection
+        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
+        println(io, "=========================================================")
+    end
     while !isempty(worklist)
         (bt, level, noisefloor, str) = popfirst!(worklist)
         isempty(str) || println(io, str)
@@ -782,21 +984,35 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
             pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
         end
     end
+    return
 end
 
-function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat)
+function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat,
+                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
     if fmt.combine
-        root = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur)
+        root, nsleeping = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     else
-        root = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur)
+        root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     end
+    util_perc = (1 - (nsleeping / root.count)) * 100
     if isempty(root.down)
-        warning_empty()
-        return
+        if is_subsection
+            Base.print(io, "Total snapshots: ")
+            printstyled(io, "$(root.count)", color=Base.warn_color())
+            Base.println(io, " (", round(Int, util_perc), "% utilization)")
+        else
+            warning_empty()
+        end
+        return true
     end
-    print_tree(io, root, cols, fmt)
-    Base.println(io, "Total snapshots: ", root.count)
-    nothing
+    print_tree(io, root, cols, fmt, is_subsection)
+    Base.print(io, "Total snapshots: ", root.count, " (", round(Int, util_perc), "% utilization")
+    if is_subsection
+        println(io, ")")
+    else
+        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)")
+    end
+    return false
 end
 
 function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
@@ -860,9 +1076,19 @@ function liperm(lilist::Vector{StackFrame})
     return sortperm(lilist, lt = lt)
 end
 
-warning_empty() = @warn """
-            There were no samples collected. Run your program longer (perhaps by
-            running it multiple times), or adjust the delay between samples with
-            `Profile.init()`."""
+function warning_empty(;summary = false)
+    if summary
+        @warn """
+        There were no samples collected in one or more groups.
+        This may be due to idle threads, or you may need to run your
+        program longer (perhaps by running it multiple times),
+        or adjust the delay between samples with `Profile.init()`."""
+    else
+        @warn """
+        There were no samples collected.
+        Run your program longer (perhaps by running it multiple times),
+        or adjust the delay between samples with `Profile.init()`."""
+    end
+end
 
 end # module
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index 76f8a3a1b8ca86..777122e571aed4 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Profile, Serialization
+using Test, Profile, Serialization, Logging
 
 Profile.clear()
 Profile.init()
@@ -59,6 +59,28 @@ let iobuf = IOBuffer()
     truncate(iobuf, 0)
 end
 
+@testset "Profile.print() groupby options" begin
+    iobuf = IOBuffer()
+    with_logger(NullLogger()) do
+        @testset for format in [:flat, :tree]
+            @testset for threads in [1:Threads.nthreads(), 1, 1:1, 1:2, [1,2]]
+                @testset for groupby in [:none, :thread, :task, [:thread, :task], [:task, :thread]]
+                    Profile.print(iobuf; groupby, threads, format)
+                    @test !isempty(String(take!(iobuf)))
+                end
+            end
+        end
+    end
+end
+
+@testset "Profile.fetch() with and without meta" begin
+    data_without = Profile.fetch()
+    data_with = Profile.fetch(include_meta = true)
+    @test data_without[1] == data_with[1]
+    @test data_without[end] == data_with[end]
+    @test length(data_without) < length(data_with)
+end
+
 Profile.clear()
 @test isempty(Profile.fetch())
 
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index 168d3e963b589b..552bb6246e3844 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -209,6 +209,10 @@ Just as `^R` is a reverse search, `^S` is a forward search, with the prompt ```(
  The two may be used in conjunction with each other to move through the previous or next matching
 results, respectively.
 
+All executed commands in the Julia REPL are logged into `~/.julia/logs/repl_history.jl` along with a timestamp of when it was executed
+and the current REPL mode you were in. Search mode queries this log file in order to find the commands which you previously ran.
+This can be disabled at startup by passing the `--history-file=no` flag to Julia.
+
 ## Key bindings
 
 The Julia REPL makes great use of key bindings. Several control-key bindings were already introduced
@@ -307,6 +311,27 @@ Users should refer to `LineEdit.jl` to discover the available actions on key inp
 In both the Julian and help modes of the REPL, one can enter the first few characters of a function
 or type and then press the tab key to get a list all matches:
 
+```julia-repl
+julia> x[TAB]
+julia> xor
+```
+
+In some cases it only completes part of the name, up to the next ambiguity:
+
+```julia-repl
+julia> mapf[TAB]
+julia> mapfold
+```
+
+If you hit tab again, then you get the list of things that might complete this:
+
+```julia-repl
+julia> mapfold[TAB]
+mapfoldl mapfoldr
+```
+
+Like other components of the REPL, the search is case-sensitive:
+
 ```julia-repl
 julia> stri[TAB]
 stride     strides     string      strip
@@ -365,6 +390,46 @@ shell> /[TAB]
 .dockerinit bin/         dev/         home/        lib64/       mnt/         proc/        run/         srv/         tmp/         var/
 ```
 
+Dictionary keys can also be tab completed:
+
+```julia-repl
+julia> foo = Dict("qwer1"=>1, "qwer2"=>2, "asdf"=>3)
+Dict{String,Int64} with 3 entries:
+  "qwer2" => 2
+  "asdf"  => 3
+  "qwer1" => 1
+
+julia> foo["q[TAB]
+
+"qwer1" "qwer2"
+julia> foo["qwer
+```
+
+Tab completion can also help completing fields:
+
+```julia-repl
+julia> x = 3 + 4im;
+
+julia> julia> x.[TAB][TAB]
+im re
+
+julia> import UUIDs
+
+julia> UUIDs.uuid[TAB][TAB]
+uuid1        uuid4         uuid5        uuid_version
+```
+
+Fields for output from functions can also be completed:
+
+```julia-repl
+julia> split("","")[1].[TAB]
+lastindex  offset  string
+```
+
+The completion of fields for output from functions uses type inference, and it can only suggest
+fields if the function is type stable.
+
+
 Tab completion can help with investigation of the available methods matching the input arguments:
 
 ```julia-repl
@@ -392,38 +457,54 @@ The completion of the methods uses type inference and can therefore see if the a
 even if the arguments are output from functions. The function needs to be type stable for the
 completion to be able to remove non-matching methods.
 
-Tab completion can also help completing fields:
+If you wonder which methods can be used with particular argument types, use `?` as the function name.
+This shows an example of looking for functions in InteractiveUtils that accept a single string:
 
 ```julia-repl
-julia> import UUIDs
-
-julia> UUIDs.uuid[TAB]
-uuid1        uuid4         uuid_version
+julia> InteractiveUtils.?("somefile")[TAB]
+edit(path::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:197
+less(file::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:266
 ```
 
-Fields for output from functions can also be completed:
+This listed methods in the `InteractiveUtils` module that can be called on a string.
+By default, this excludes methods where all arguments are typed as `Any`,
+but you can see those too by holding down SHIFT-TAB instead of TAB:
 
 ```julia-repl
-julia> split("","")[1].[TAB]
-lastindex  offset  string
+julia> InteractiveUtils.?("somefile")[SHIFT-TAB]
+apropos(string) in REPL at REPL/src/docview.jl:796
+clipboard(x) in InteractiveUtils at InteractiveUtils/src/clipboard.jl:64
+code_llvm(f) in InteractiveUtils at InteractiveUtils/src/codeview.jl:221
+code_native(f) in InteractiveUtils at InteractiveUtils/src/codeview.jl:243
+edit(path::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:197
+edit(f) in InteractiveUtils at InteractiveUtils/src/editless.jl:225
+eval(x) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:3
+include(x) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:3
+less(file::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:266
+less(f) in InteractiveUtils at InteractiveUtils/src/editless.jl:274
+report_bug(kind) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:391
+separate_kwargs(args...; kwargs...) in InteractiveUtils at InteractiveUtils/src/macros.jl:7
 ```
 
-The completion of fields for output from functions uses type inference, and it can only suggest
-fields if the function is type stable.
+You can also use ` ?("somefile")[TAB]`  and look across all modules, but the method lists can be long.
 
-Dictionary keys can also be tab completed:
+By omitting the closing parenthesis, you can include functions that might require additional arguments:
 
 ```julia-repl
-julia> foo = Dict("qwer1"=>1, "qwer2"=>2, "asdf"=>3)
-Dict{String,Int64} with 3 entries:
-  "qwer2" => 2
-  "asdf"  => 3
-  "qwer1" => 1
-
-julia> foo["q[TAB]
-
-"qwer1" "qwer2"
-julia> foo["qwer
+julia> using Mmap
+
+help?> Mmap.?("file",[TAB]
+Mmap.Anonymous(name::String, readonly::Bool, create::Bool) in Mmap at Mmap/src/Mmap.jl:16
+mmap(file::AbstractString) in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}) where T<:Array in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}) where {T<:Array, N} in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}, offset::Integer; grow, shared) where {T<:Array, N} in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, len::Integer) where T<:Array in Mmap at Mmap/src/Mmap.jl:251
+mmap(file::AbstractString, ::Type{T}, len::Integer, offset::Integer; grow, shared) where T<:Array in Mmap at Mmap/src/Mmap.jl:251
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}) where {T<:BitArray, N} in Mmap at Mmap/src/Mmap.jl:316
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}, offset::Integer; grow, shared) where {T<:BitArray, N} in Mmap at Mmap/src/Mmap.jl:316
+mmap(file::AbstractString, ::Type{T}, len::Integer) where T<:BitArray in Mmap at Mmap/src/Mmap.jl:322
+mmap(file::AbstractString, ::Type{T}, len::Integer, offset::Integer; grow, shared) where T<:BitArray in Mmap at Mmap/src/Mmap.jl:322
 ```
 
 ## Customizing Colors
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index 718cd02e6eb98b..89f9a4cb992089 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -106,6 +106,11 @@ mutable struct PromptState <: ModeState
     refresh_wait::Union{Timer,Nothing}
 end
 
+struct Modifiers
+    shift::Bool
+end
+Modifiers() = Modifiers(false)
+
 options(s::PromptState) =
     if isdefined(s.p, :repl) && isdefined(s.p.repl, :options)
         # we can't test isa(s.p.repl, LineEditREPL) as LineEditREPL is defined
@@ -182,7 +187,7 @@ function beep(s::PromptState, duration::Real=options(s).beep_duration,
     isinteractive() || return # some tests fail on some platforms
     s.beeping = min(s.beeping + duration, maxduration)
     let colors = Base.copymutable(colors)
-        @async begin
+        errormonitor(@async begin
             trylock(s.refresh_lock) || return
             try
                 orig_prefix = s.p.prompt_prefix
@@ -198,12 +203,10 @@ function beep(s::PromptState, duration::Real=options(s).beep_duration,
                 s.p.prompt_prefix = orig_prefix
                 refresh_multi_line(s, beeping=true)
                 s.beeping = 0.0
-            catch e
-                Base.showerror(stdout, e, catch_backtrace())
             finally
                 unlock(s.refresh_lock)
             end
-        end
+        end)
     end
     nothing
 end
@@ -788,23 +791,32 @@ function edit_insert(s::PromptState, c::StringLike)
         after = options(s).auto_refresh_time_delay
         termbuf = terminal(s)
         w = width(termbuf)
-        delayup = !eof(buf) || old_wait
         offset = s.ias.curs_row == 1 || s.indent < 0 ?
             sizeof(prompt_string(s.p.prompt)::String) : s.indent
         offset += position(buf) - beginofline(buf) # size of current line
-        if offset + textwidth(str) <= w
+        spinner = '\0'
+        delayup = !eof(buf) || old_wait
+        if offset + textwidth(str) <= w && !(after == 0 && delayup)
             # Avoid full update when appending characters to the end
             # and an update of curs_row isn't necessary (conservatively estimated)
             write(termbuf, str)
+            spinner = ' ' # temporarily clear under the cursor
         elseif after == 0
             refresh_line(s)
             delayup = false
-        else
+        else # render a spinner for each key press
+            if old_wait || length(str) != 1
+                spinner = spin_seq[mod1(position(buf) - w, length(spin_seq))]
+            else
+                spinner = str[end]
+            end
             delayup = true
         end
         if delayup
-            write(termbuf, spin_seq[mod1(position(buf) - w, length(spin_seq))])
-            cmove_left(termbuf)
+            if spinner != '\0'
+                write(termbuf, spinner)
+                cmove_left(termbuf)
+            end
             s.refresh_wait = Timer(after) do t
                 s.refresh_wait === t || return
                 s.refresh_wait = nothing
@@ -1900,6 +1912,10 @@ mode(s::PromptState) = s.p          # ::Prompt
 mode(s::SearchState) = @assert false
 mode(s::PrefixSearchState) = s.histprompt.parent_prompt   # ::Prompt
 
+setmodifiers!(s::MIState, m::Modifiers) = setmodifiers!(mode(s), m)
+setmodifiers!(p::Prompt, m::Modifiers) = setmodifiers!(p.complete, m)
+setmodifiers!(c) = nothing
+
 # Search Mode completions
 function complete_line(s::SearchState, repeats)
     completions, partial, should_complete = complete_line(s.histprompt.complete, s)
@@ -2167,6 +2183,11 @@ function edit_tab(s::MIState, jump_spaces::Bool=false, delete_trailing::Bool=jum
     return refresh_line(s)
 end
 
+function shift_tab_completion(s::MIState)
+    setmodifiers!(s, Modifiers(true))
+    return complete_line(s)
+end
+
 # return true iff the content of the buffer is modified
 # return false when only the position changed
 function edit_insert_tab(buf::IOBuffer, jump_spaces::Bool=false, delete_trailing::Bool=jump_spaces)
@@ -2202,6 +2223,8 @@ const default_keymap =
 AnyDict(
     # Tab
     '\t' => (s::MIState,o...)->edit_tab(s, true),
+    # Shift-tab
+    "\e[Z" => (s::MIState,o...)->shift_tab_completion(s),
     # Enter
     '\r' => (s::MIState,o...)->begin
         if on_enter(s) || (eof(buffer(s)) && s.key_repeats > 1)
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index 68f157322facc1..9e67ad9c2d8ab9 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -29,8 +29,7 @@ import Base:
     display,
     show,
     AnyDict,
-    ==,
-    catch_stack
+    ==
 
 _displaysize(io::IO) = displaysize(io)::Tuple{Int,Int}
 
@@ -56,6 +55,7 @@ import ..LineEdit:
     history_last,
     history_search,
     accept_result,
+    setmodifiers!,
     terminal,
     MIState,
     PromptState,
@@ -76,6 +76,9 @@ end
 answer_color(::AbstractREPL) = ""
 
 const JULIA_PROMPT = "julia> "
+const PKG_PROMPT = "pkg> "
+const SHELL_PROMPT = "shell> "
+const HELP_PROMPT = "help?> "
 
 mutable struct REPLBackend
     "channel for AST"
@@ -123,6 +126,12 @@ const softscope! = softscope
 
 const repl_ast_transforms = Any[softscope] # defaults for new REPL backends
 
+# Allows an external package to add hooks into the code loading.
+# The hook should take a Vector{Symbol} of package names and
+# return true if all packages could be installed, false if not
+# to e.g. install packages on demand
+const install_packages_hooks = Any[]
+
 function eval_user_input(@nospecialize(ast), backend::REPLBackend)
     lasterr = nothing
     Base.sigatomic_begin()
@@ -133,6 +142,9 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend)
                 put!(backend.response_channel, Pair{Any, Bool}(lasterr, true))
             else
                 backend.in_eval = true
+                if !isempty(install_packages_hooks)
+                    check_for_missing_packages_and_run_hooks(ast)
+                end
                 for xf in backend.ast_transforms
                     ast = Base.invokelatest(xf, ast)
                 end
@@ -148,13 +160,46 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend)
                 println("SYSTEM ERROR: Failed to report error to REPL frontend")
                 println(err)
             end
-            lasterr = catch_stack()
+            lasterr = current_exceptions()
         end
     end
     Base.sigatomic_end()
     nothing
 end
 
+function check_for_missing_packages_and_run_hooks(ast)
+    isa(ast, Expr) || return
+    mods = modules_to_be_loaded(ast)
+    filter!(mod -> isnothing(Base.identify_package(String(mod))), mods) # keep missing modules
+    if !isempty(mods)
+        for f in install_packages_hooks
+            Base.invokelatest(f, mods) && return
+        end
+    end
+end
+
+function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+    ast.head == :quote && return mods # don't search if it's not going to be run during this eval
+    if ast.head in [:using, :import]
+        for arg in ast.args
+            arg = arg::Expr
+            arg1 = first(arg.args)
+            if arg1 isa Symbol # i.e. `Foo`
+                if arg1 != :. # don't include local imports
+                    push!(mods, arg1)
+                end
+            else # i.e. `Foo: bar`
+                push!(mods, first((arg1::Expr).args))
+            end
+        end
+    end
+    for arg in ast.args
+        arg isa Expr && modules_to_be_loaded(arg, mods)
+    end
+    filter!(mod -> !in(String(mod), ["Base", "Main", "Core"]), mods) # Exclude special non-package modules
+    return unique(mods)
+end
+
 """
     start_repl_backend(repl_channel::Channel, response_channel::Channel)
 
@@ -260,7 +305,7 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
                 println(errio) # an error during printing is likely to leave us mid-line
                 println(errio, "SYSTEM (REPL): showing an error caused an error")
                 try
-                    Base.invokelatest(Base.display_error, errio, catch_stack())
+                    Base.invokelatest(Base.display_error, errio, current_exceptions())
                 catch e
                     # at this point, only print the name of the type as a Symbol to
                     # minimize the possibility of further errors.
@@ -270,7 +315,7 @@ function print_response(errio::IO, response, show_value::Bool, have_color::Bool,
                 end
                 break
             end
-            val = catch_stack()
+            val = current_exceptions()
             iserr = true
         end
     end
@@ -313,10 +358,12 @@ function run_repl(repl::AbstractREPL, @nospecialize(consumer = x -> nothing); ba
         end
     if backend_on_current_task
         t = @async run_frontend(repl, backend_ref)
+        errormonitor(t)
         Base._wait2(t, cleanup)
         start_repl_backend(backend, consumer)
     else
         t = @async start_repl_backend(backend, consumer)
+        errormonitor(t)
         Base._wait2(t, cleanup)
         run_frontend(repl, backend_ref)
     end
@@ -427,16 +474,30 @@ LineEditREPL(t::TextTerminal, hascolor::Bool, envcolors::Bool=false) =
         false, false, false, envcolors
     )
 
-mutable struct REPLCompletionProvider <: CompletionProvider end
+mutable struct REPLCompletionProvider <: CompletionProvider
+    modifiers::LineEdit.Modifiers
+end
+REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
+setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers = m
+
 beforecursor(buf::IOBuffer) = String(buf.data[1:buf.ptr-1])
 
 function complete_line(c::REPLCompletionProvider, s::PromptState)
     partial = beforecursor(s.input_buffer)
     full = LineEdit.input_string(s)
     ret, range, should_complete = completions(full, lastindex(partial))
+    if !c.modifiers.shift
+        # Filter out methods where all arguments are `Any`
+        filter!(ret) do c
+            isa(c, REPLCompletions.MethodCompletion) || return true
+            sig = Base.unwrap_unionall(c.method.sig)::DataType
+            return !all(T -> T === Any || T === Vararg{Any}, sig.parameters[2:end])
+        end
+    end
+    c.modifiers = LineEdit.Modifiers()
     return unique!(map(completion_text, ret)), partial[range], should_complete
 end
 
@@ -792,7 +853,7 @@ function respond(f, repl, main; pass_empty::Bool = false, suppress_on_semicolon:
                 ast = Base.invokelatest(f, line)
                 response = eval_with_backend(ast, backend(repl))
             catch
-                response = Pair{Any, Bool}(catch_stack(), true)
+                response = Pair{Any, Bool}(current_exceptions(), true)
             end
             hide_output = suppress_on_semicolon && ends_with_semicolon(line)
             print_response(repl, response, !hide_output, hascolor(repl))
@@ -895,7 +956,7 @@ function setup_interface(
         on_enter = return_callback)
 
     # Setup help mode
-    help_mode = Prompt("help?> ",
+    help_mode = Prompt(HELP_PROMPT,
         prompt_prefix = hascolor ? repl.help_color : "",
         prompt_suffix = hascolor ?
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
@@ -907,7 +968,7 @@ function setup_interface(
 
 
     # Set up shell mode
-    shell_mode = Prompt("shell> ";
+    shell_mode = Prompt(SHELL_PROMPT;
         prompt_prefix = hascolor ? repl.shell_color : "",
         prompt_suffix = hascolor ?
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
@@ -944,7 +1005,7 @@ function setup_interface(
             hist_from_file(hp, hist_path)
         catch
             # use REPL.hascolor to avoid using the local variable with the same name
-            print_response(repl, Pair{Any, Bool}(catch_stack(), true), true, REPL.hascolor(repl))
+            print_response(repl, Pair{Any, Bool}(current_exceptions(), true), true, REPL.hascolor(repl))
             println(outstream(repl))
             @info "Disabling history file for this session"
             repl.history_file = false
@@ -961,6 +1022,12 @@ function setup_interface(
     search_prompt, skeymap = LineEdit.setup_search_keymap(hp)
     search_prompt.complete = LatexCompletions()
 
+    jl_prompt_len = length(JULIA_PROMPT)
+    pkg_prompt_len = length(PKG_PROMPT)
+    shell_prompt_len = length(SHELL_PROMPT)
+    help_prompt_len = length(HELP_PROMPT)
+    pkg_prompt_regex = r"^(?:\(.+\) )?pkg> "
+
     # Canonicalize user keymap input
     if isa(extra_repl_keymap, Dict)
         extra_repl_keymap = AnyDict[extra_repl_keymap]
@@ -1013,12 +1080,15 @@ function setup_interface(
             oldpos = firstindex(input)
             firstline = true
             isprompt_paste = false
-            jl_prompt_len = 7 # "julia> "
+            curr_prompt_len = 0
+            pasting_help = false
+
             while oldpos <= lastindex(input) # loop until all lines have been executed
                 if JL_PROMPT_PASTE[]
-                    # Check if the next statement starts with "julia> ", in that case
-                    # skip it. But first skip whitespace
-                    while input[oldpos] in ('\n', ' ', '\t')
+                    # Check if the next statement starts with a prompt i.e. "julia> ", in that case
+                    # skip it. But first skip whitespace unless pasting in a docstring which may have
+                    # indented prompt examples that we don't want to execute
+                    while input[oldpos] in (pasting_help ? ('\n') : ('\n', ' ', '\t'))
                         oldpos = nextind(input, oldpos)
                         oldpos >= sizeof(input) && return
                     end
@@ -1026,7 +1096,32 @@ function setup_interface(
                     if (firstline || isprompt_paste) && startswith(SubString(input, oldpos), JULIA_PROMPT)
                         isprompt_paste = true
                         oldpos += jl_prompt_len
-                    # If we are prompt pasting and current statement does not begin with julia> , skip to next line
+                        curr_prompt_len = jl_prompt_len
+                        transition(s, julia_prompt)
+                        pasting_help = false
+                    # Check if input line starts with "pkg> " or "(...) pkg> ", remove it if we are in prompt paste mode and switch mode
+                    elseif (firstline || isprompt_paste) && startswith(SubString(input, oldpos), pkg_prompt_regex)
+                        detected_pkg_prompt = match(pkg_prompt_regex, SubString(input, oldpos)).match
+                        isprompt_paste = true
+                        curr_prompt_len = sizeof(detected_pkg_prompt)
+                        oldpos += curr_prompt_len
+                        Base.active_repl.interface.modes[1].keymap_dict[']'](s, o...)
+                        pasting_help = false
+                    # Check if input line starts with "shell> ", remove it if we are in prompt paste mode and switch mode
+                    elseif (firstline || isprompt_paste) && startswith(SubString(input, oldpos), SHELL_PROMPT)
+                        isprompt_paste = true
+                        oldpos += shell_prompt_len
+                        curr_prompt_len = shell_prompt_len
+                        transition(s, shell_mode)
+                        pasting_help = false
+                    # Check if input line starts with "help?> ", remove it if we are in prompt paste mode and switch mode
+                    elseif (firstline || isprompt_paste) && startswith(SubString(input, oldpos), HELP_PROMPT)
+                        isprompt_paste = true
+                        oldpos += help_prompt_len
+                        curr_prompt_len = help_prompt_len
+                        transition(s, help_mode)
+                        pasting_help = true
+                    # If we are prompt pasting and current statement does not begin with a mode prefix, skip to next line
                     elseif isprompt_paste
                         while input[oldpos] != '\n'
                             oldpos = nextind(input, oldpos)
@@ -1035,11 +1130,35 @@ function setup_interface(
                         continue
                     end
                 end
-                ast, pos = Meta.parse(input, oldpos, raise=false, depwarn=false)
-                if (isa(ast, Expr) && (ast.head === :error || ast.head === :incomplete)) ||
-                        (pos > ncodeunits(input) && !endswith(input, '\n'))
-                    # remaining text is incomplete (an error, or parser ran to the end but didn't stop with a newline):
-                    # Insert all the remaining text as one line (might be empty)
+                dump_tail = false
+                nl_pos = findfirst('\n', input[oldpos:end])
+                if s.current_mode == julia_prompt
+                    ast, pos = Meta.parse(input, oldpos, raise=false, depwarn=false)
+                    if (isa(ast, Expr) && (ast.head === :error || ast.head === :incomplete)) ||
+                            (pos > ncodeunits(input) && !endswith(input, '\n'))
+                        # remaining text is incomplete (an error, or parser ran to the end but didn't stop with a newline):
+                        # Insert all the remaining text as one line (might be empty)
+                        dump_tail = true
+                    end
+                elseif isnothing(nl_pos) # no newline at end, so just dump the tail into the prompt and don't execute
+                    dump_tail = true
+                elseif s.current_mode == shell_mode # handle multiline shell commands
+                    lines = split(input[oldpos:end], '\n')
+                    pos = oldpos + sizeof(lines[1]) + 1
+                    if length(lines) > 1
+                        for line in lines[2:end]
+                            # to be recognized as a multiline shell command, the lines must be indented to the
+                            # same prompt position
+                            if !startswith(line, ' '^curr_prompt_len)
+                                break
+                            end
+                            pos += sizeof(line) + 1
+                        end
+                    end
+                else
+                    pos = oldpos + nl_pos
+                end
+                if dump_tail
                     tail = input[oldpos:end]
                     if !firstline
                         # strip leading whitespace, but only if it was the result of executing something
@@ -1047,7 +1166,7 @@ function setup_interface(
                         tail = lstrip(tail)
                     end
                     if isprompt_paste # remove indentation spaces corresponding to the prompt
-                        tail = replace(tail, r"^"m * ' '^jl_prompt_len => "")
+                        tail = replace(tail, r"^"m * ' '^curr_prompt_len => "")
                     end
                     LineEdit.replace_line(s, tail, true)
                     LineEdit.refresh_line(s)
@@ -1057,7 +1176,7 @@ function setup_interface(
                 line = strip(input[oldpos:prevind(input, pos)])
                 if !isempty(line)
                     if isprompt_paste # remove indentation spaces corresponding to the prompt
-                        line = replace(line, r"^"m * ' '^jl_prompt_len => "")
+                        line = replace(line, r"^"m * ' '^curr_prompt_len => "")
                     end
                     # put the line on the screen and history
                     LineEdit.replace_line(s, line)
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index fa5fd8434bec70..6845f1bc284fd4 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -144,7 +144,7 @@ function complete_symbol(sym::String, ffunc, context_module::Module=Main)
             if isa(b, Module)
                 mod = b
                 lookup_module = true
-            elseif Base.isstructtype(typeof(b))
+            else
                 lookup_module = false
                 t = typeof(b)
             end
@@ -384,16 +384,27 @@ function get_value(sym::Expr, fn)
 end
 get_value(sym::Symbol, fn) = isdefined(fn, sym) ? (getfield(fn, sym), true) : (nothing, false)
 get_value(sym::QuoteNode, fn) = isdefined(fn, sym.value) ? (getfield(fn, sym.value), true) : (nothing, false)
+get_value(sym::GlobalRef, fn) = get_value(sym.name, sym.mod)
 get_value(sym, fn) = (sym, true)
 
-# Return the value of a getfield call expression
-function get_value_getfield(ex::Expr, fn)
-    # Example :((top(getfield))(Base,:max))
-    val, found = get_value_getfield(ex.args[2],fn) #Look up Base in Main and returns the module
-    (found && length(ex.args) >= 3) || return (nothing, false)
-    return get_value_getfield(ex.args[3], val) #Look up max in Base and returns the function if found.
+# Return the type of a getfield call expression
+function get_type_getfield(ex::Expr, fn::Module)
+    length(ex.args) == 3 || return Any, false # should never happen, but just for safety
+    obj, x = ex.args[2:3]
+    objt, found = get_type(obj, fn)
+    objt isa DataType || return Any, false
+    found || return Any, false
+    if x isa QuoteNode
+        fld = x.value
+    elseif isexpr(x, :quote) || isexpr(x, :inert)
+        fld = x.args[1]
+    else
+        fld = nothing # we don't know how to get the value of variable `x` here
+    end
+    fld isa Symbol || return Any, false
+    hasfield(objt, fld) || return Any, false
+    return fieldtype(objt, fld), true
 end
-get_value_getfield(sym, fn) = get_value(sym, fn)
 
 # Determines the return type with Base.return_types of a function call using the type information of the arguments.
 function get_type_call(expr::Expr)
@@ -423,7 +434,7 @@ function get_type_call(expr::Expr)
     return (return_type, true)
 end
 
-# Returns the return type. example: get_type(:(Base.strip("", ' ')), Main) returns (String, true)
+# Returns the return type. example: get_type(:(Base.strip("", ' ')), Main) returns (SubString{String}, true)
 function try_get_type(sym::Expr, fn::Module)
     val, found = get_value(sym, fn)
     found && return Core.Typeof(val), found
@@ -431,10 +442,8 @@ function try_get_type(sym::Expr, fn::Module)
         # getfield call is special cased as the evaluation of getfield provides good type information,
         # is inexpensive and it is also performed in the complete_symbol function.
         a1 = sym.args[1]
-        if isa(a1,GlobalRef) && isconst(a1.mod,a1.name) && isdefined(a1.mod,a1.name) &&
-            eval(a1) === Core.getfield
-            val, found = get_value_getfield(sym, Main)
-            return found ? Core.Typeof(val) : Any, found
+        if a1 === :getfield || a1 === GlobalRef(Core, :getfield)
+            return get_type_getfield(sym, fn)
         end
         return get_type_call(sym)
     elseif sym.head === :thunk
@@ -456,6 +465,11 @@ function get_type(sym::Expr, fn::Module)
     # try to analyze nests of calls. if this fails, try using the expanded form.
     val, found = try_get_type(sym, fn)
     found && return val, found
+    # https://github.com/JuliaLang/julia/issues/27184
+    if isexpr(sym, :macrocall)
+        _, found = get_type(first(sym.args), fn)
+        found || return Any, false
+    end
     return try_get_type(Meta.lower(fn, sym), fn)
 end
 
@@ -464,17 +478,59 @@ function get_type(sym, fn::Module)
     return found ? Core.Typeof(val) : Any, found
 end
 
+function get_type(T, found::Bool, default_any::Bool)
+    return found ? T :
+           default_any ? Any : throw(ArgumentError("argument not found"))
+end
+
 # Method completion on function call expression that look like :(max(1))
 function complete_methods(ex_org::Expr, context_module::Module=Main)
     func, found = get_value(ex_org.args[1], context_module)::Tuple{Any,Bool}
     !found && return Completion[]
 
-    funargs = ex_org.args[2:end]
-    # handle broadcasting, but only handle number of arguments instead of
-    # argument types
+    args_ex, kwargs_ex = complete_methods_args(ex_org.args[2:end], ex_org, context_module, true, true)
+
+    out = Completion[]
+    complete_methods!(out, func, args_ex, kwargs_ex)
+    return out
+end
+
+function complete_any_methods(ex_org::Expr, callee_module::Module, context_module::Module, moreargs::Bool)
+    out = Completion[]
+    args_ex, kwargs_ex = try
+        complete_methods_args(ex_org.args[2:end], ex_org, context_module, false, false)
+    catch
+        return out
+    end
+
+    for name in names(callee_module; all=true)
+        if !Base.isdeprecated(callee_module, name) && isdefined(callee_module, name)
+            func = getfield(callee_module, name)
+            if !isa(func, Module)
+                complete_methods!(out, func, args_ex, kwargs_ex, moreargs)
+            elseif callee_module === Main::Module && isa(func, Module)
+                callee_module2 = func
+                for name in names(callee_module2)
+                    if isdefined(callee_module2, name)
+                        func = getfield(callee_module, name)
+                        if !isa(func, Module)
+                            complete_methods!(out, func, args_ex, kwargs_ex, moreargs)
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    return out
+end
+
+function complete_methods_args(funargs::Vector{Any}, ex_org::Expr, context_module::Module, default_any::Bool, allow_broadcasting::Bool)
     args_ex = Any[]
     kwargs_ex = Pair{Symbol,Any}[]
-    if ex_org.head === :. && ex_org.args[2] isa Expr
+    if allow_broadcasting && ex_org.head === :. && ex_org.args[2] isa Expr
+        # handle broadcasting, but only handle number of arguments instead of
+        # argument types
         for _ in (ex_org.args[2]::Expr).args
             push!(args_ex, Any)
         end
@@ -483,18 +539,20 @@ function complete_methods(ex_org::Expr, context_module::Module=Main)
             if isexpr(ex, :parameters)
                 for x in ex.args
                     n, v = isexpr(x, :kw) ? (x.args...,) : (x, x)
-                    push!(kwargs_ex, n => first(get_type(v, context_module)))
+                    push!(kwargs_ex, n => get_type(get_type(v, context_module)..., default_any))
                 end
             elseif isexpr(ex, :kw)
                 n, v = (ex.args...,)
-                push!(kwargs_ex, n => first(get_type(v, context_module)))
+                push!(kwargs_ex, n => get_type(get_type(v, context_module)..., default_any))
             else
-                push!(args_ex, first(get_type(ex, context_module)))
+                push!(args_ex, get_type(get_type(ex, context_module)..., default_any))
             end
         end
     end
+    return args_ex, kwargs_ex
+end
 
-    out = Completion[]
+function complete_methods!(out::Vector{Completion}, @nospecialize(func), args_ex::Vector{Any}, kwargs_ex::Vector{Pair{Symbol,Any}}, moreargs::Bool=true)
     ml = methods(func)
     # Input types and number of arguments
     if isempty(kwargs_ex)
@@ -511,6 +569,9 @@ function complete_methods(ex_org::Expr, context_module::Module=Main)
         ml = methods(kwfunc)
         func = kwfunc
     end
+    if !moreargs
+        na = typemax(Int)
+    end
 
     for (method::Method, orig_method) in zip(ml, orig_ml)
         ms = method.sig
@@ -520,7 +581,6 @@ function complete_methods(ex_org::Expr, context_module::Module=Main)
             push!(out, MethodCompletion(func, t_in, method, orig_method))
         end
     end
-    return out
 end
 
 include("latex_symbols.jl")
@@ -638,6 +698,36 @@ function completions(string::String, pos::Int, context_module::Module=Main)
     partial = string[1:pos]
     inc_tag = Base.incomplete_tag(Meta.parse(partial, raise=false, depwarn=false))
 
+    # ?(x, y)TAB lists methods you can call with these objects
+    # ?(x, y TAB lists methods that take these objects as the first two arguments
+    # MyModule.?(x, y)TAB restricts the search to names in MyModule
+    rexm = match(r"(\w+\.|)\?\((.*)$", partial)
+    if rexm !== nothing
+        # Get the module scope
+        if isempty(rexm.captures[1])
+            callee_module = context_module
+        else
+            modname = Symbol(rexm.captures[1][1:end-1])
+            if isdefined(context_module, modname)
+                callee_module = getfield(context_module, modname)
+                if !isa(callee_module, Module)
+                    callee_module = context_module
+                end
+            else
+                callee_module = context_module
+            end
+        end
+        moreargs = !endswith(rexm.captures[2], ')')
+        callstr = "_(" * rexm.captures[2]
+        if moreargs
+            callstr *= ')'
+        end
+        ex_org = Meta.parse(callstr, raise=false, depwarn=false)
+        if isa(ex_org, Expr)
+            return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs), (0:length(rexm.captures[1])+1) .+ rexm.offset, false
+        end
+    end
+
     # if completing a key in a Dict
     identifier, partial_key, loc = dict_identifier_key(partial, inc_tag, context_module)
     if identifier !== nothing
@@ -811,4 +901,22 @@ function shell_completions(string, pos)
     return Completion[], 0:-1, false
 end
 
+function UndefVarError_hint(io::IO, ex::UndefVarError)
+    var = ex.var
+    if var === :or
+        print(io, "\nsuggestion: Use `||` for short-circuiting boolean OR.")
+    elseif var === :and
+        print(io, "\nsuggestion: Use `&&` for short-circuiting boolean AND.")
+    elseif var === :help
+        println(io)
+        # Show friendly help message when user types help or help() and help is undefined
+        show(io, MIME("text/plain"), Base.Docs.parsedoc(Base.Docs.keywords[:help]))
+    end
+end
+
+function __init__()
+    Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError)
+    nothing
+end
+
 end # module
diff --git a/stdlib/REPL/src/TerminalMenus/Pager.jl b/stdlib/REPL/src/TerminalMenus/Pager.jl
new file mode 100644
index 00000000000000..af49c3aa63440d
--- /dev/null
+++ b/stdlib/REPL/src/TerminalMenus/Pager.jl
@@ -0,0 +1,40 @@
+mutable struct Pager{C} <: _ConfiguredMenu{C}
+    lines::Vector{String}
+    pagesize::Int
+    pageoffset::Int
+    selected::Nothing
+    config::C
+end
+
+function Pager(text::AbstractString; pagesize::Int=10, kwargs...)
+    lines = readlines(IOBuffer(text))
+    return Pager(lines, pagesize, 0, nothing, Config(; kwargs...))
+end
+
+function header(p::Pager)
+    total = length(p.lines)
+    current = min(p.pageoffset + p.pagesize, total)
+    percent = round(Int, (current / total) * 100)
+    return "($(lpad(current, ndigits(total))) / $total) $(lpad(percent, 3))%"
+end
+
+options(p::Pager) = p.lines
+
+cancel(::Pager) = nothing
+
+pick(::Pager, ::Int) = true
+
+function writeline(buf::IOBuffer, pager::Pager{Config}, idx::Int, iscursor::Bool)
+    print(buf, pager.lines[idx])
+end
+
+function pager(terminal, object)
+    lines, columns = displaysize(terminal)::Tuple{Int,Int}
+    columns -= 3
+    buffer = IOBuffer()
+    ctx = IOContext(buffer, :color => REPL.Terminals.hascolor(terminal), :displaysize => (lines, columns))
+    show(ctx, "text/plain", object)
+    pager = Pager(String(take!(buffer)); pagesize = div(lines, 2))
+    return request(terminal, pager)
+end
+pager(object) = pager(terminal, object)
diff --git a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
index c8bdc557377b99..2060af2e146230 100644
--- a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
@@ -21,6 +21,7 @@ Your favorite fruit is blueberry!
 """
 mutable struct RadioMenu{C} <: _ConfiguredMenu{C}
     options::Array{String,1}
+    keybindings::Vector{Char}
     pagesize::Int
     pageoffset::Int
     selected::Int
@@ -43,8 +44,9 @@ user.
 
 Any additional keyword arguments will be passed to [`TerminalMenus.Config`](@ref).
 """
-function RadioMenu(options::Array{String,1}; pagesize::Int=10, warn::Bool=true, kwargs...)
+function RadioMenu(options::Array{String,1}; pagesize::Int=10, warn::Bool=true, keybindings::Vector{Char}=Char[], kwargs...)
     length(options) < 1 && error("RadioMenu must have at least one option")
+    length(keybindings) in [0, length(options)] || error("RadioMenu must have either no keybindings, or one per option")
 
     # if pagesize is -1, use automatic paging
     pagesize = pagesize == -1 ? length(options) : pagesize
@@ -57,10 +59,10 @@ function RadioMenu(options::Array{String,1}; pagesize::Int=10, warn::Bool=true,
     selected = -1 # none
 
     if !isempty(kwargs)
-        RadioMenu(options, pagesize, pageoffset, selected, Config(; kwargs...))
+        RadioMenu(options, keybindings, pagesize, pageoffset, selected, Config(; kwargs...))
     else
         warn && Base.depwarn("Legacy `RadioMenu` interface is deprecated, set a configuration option such as `RadioMenu(options; charset=:ascii)` to trigger the new interface.", :RadioMenu)
-        RadioMenu(options, pagesize, pageoffset, selected, CONFIG)
+        RadioMenu(options, keybindings, pagesize, pageoffset, selected, CONFIG)
     end
 end
 
@@ -83,6 +85,14 @@ function writeline(buf::IOBuffer, menu::RadioMenu{Config}, idx::Int, iscursor::B
     print(buf, replace(menu.options[idx], "\n" => "\\n"))
 end
 
+function keypress(m::RadioMenu, i::UInt32)
+    isempty(m.keybindings) && return false
+    i = findfirst(isequal(i), Int.(m.keybindings))
+    isnothing(i) && return false
+    m.selected = i
+    return true
+end
+
 # Legacy interface
 function writeLine(buf::IOBuffer, menu::RadioMenu{<:Dict}, idx::Int, cursor::Bool)
     # print a ">" on the selected entry
diff --git a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
index d9d3dc8598f7da..87869e84d98388 100644
--- a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
+++ b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
@@ -17,10 +17,12 @@ include("config.jl")
 include("AbstractMenu.jl")
 include("RadioMenu.jl")
 include("MultiSelectMenu.jl")
+include("Pager.jl")
 
 export
     RadioMenu,
     MultiSelectMenu,
+    Pager,
     request
 
 # TODO: remove in Julia 2.0
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index 9f36cadd0b8685..000c1372cbe6a9 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -221,10 +221,10 @@ function lookup_doc(ex)
         str = string(ex)
         isdotted = startswith(str, ".")
         if endswith(str, "=") && Base.operator_precedence(ex) == Base.prec_assignment && ex !== :(:=)
-            op = str[1:end-1]
+            op = chop(str)
             eq = isdotted ? ".=" : "="
             return Markdown.parse("`x $op= y` is a synonym for `x $eq x $op y`")
-        elseif isdotted
+        elseif isdotted && ex !== :(..)
             op = str[2:end]
             return Markdown.parse("`x $ex y` is akin to `broadcast($op, x, y)`. See [`broadcast`](@ref).")
         end
@@ -243,10 +243,12 @@ end
 
 function summarize(binding::Binding, sig)
     io = IOBuffer()
-    println(io, "No documentation found.\n")
     if defined(binding)
-        summarize(io, resolve(binding), binding)
+        binding_res = resolve(binding)
+        !isa(binding_res, Module) && println(io, "No documentation found.\n")
+        summarize(io, binding_res, binding)
     else
+        println(io, "No documentation found.\n")
         quot = any(isspace, sprint(print, binding)) ? "'" : ""
         println(io, "Binding ", quot, "`", binding, "`", quot, " does not exist.")
     end
@@ -270,14 +272,14 @@ function summarize(io::IO, TT::Type, binding::Binding)
     if T isa DataType
         println(io, "```")
         print(io,
-            T.abstract ? "abstract type " :
-            T.mutable  ? "mutable struct " :
+            Base.isabstracttype(T) ? "abstract type " :
+            Base.ismutabletype(T)  ? "mutable struct " :
             Base.isstructtype(T) ? "struct " :
             "primitive type ")
         supert = supertype(T)
         println(io, T)
         println(io, "```")
-        if !T.abstract && T.name !== Tuple.name && !isempty(fieldnames(T))
+        if !Base.isabstracttype(T) && T.name !== Tuple.name && !isempty(fieldnames(T))
             println(io, "# Fields")
             println(io, "```")
             pad = maximum(length(string(f)) for f in fieldnames(T))
@@ -291,7 +293,7 @@ function summarize(io::IO, TT::Type, binding::Binding)
             println(io, "# Subtypes")
             println(io, "```")
             for t in subt
-                println(io, t)
+                println(io, Base.unwrap_unionall(t))
             end
             println(io, "```")
         end
@@ -313,16 +315,46 @@ function summarize(io::IO, TT::Type, binding::Binding)
     end
 end
 
-function summarize(io::IO, m::Module, binding::Binding)
-    println(io, "No docstring found for module `", m, "`.\n")
+function find_readme(m::Module)::Union{String, Nothing}
+    mpath = pathof(m)
+    isnothing(mpath) && return nothing
+    !isfile(mpath) && return nothing # modules in sysimage, where src files are omitted
+    path = dirname(mpath)
+    top_path = pkgdir(m)
+    while true
+        for file in readdir(path; join=true, sort=true)
+            isfile(file) && (basename(lowercase(file)) in ["readme.md", "readme"]) || continue
+            return file
+        end
+        path == top_path && break # go no further than pkgdir
+        path = dirname(path) # work up through nested modules
+    end
+    return nothing
+end
+function summarize(io::IO, m::Module, binding::Binding; nlines::Int = 200)
+    readme_path = find_readme(m)
+    if isnothing(readme_path)
+        println(io, "No docstring or readme file found for module `$m`.\n")
+    else
+        println(io, "No docstring found for module `$m`.")
+    end
     exports = filter!(!=(nameof(m)), names(m))
     if isempty(exports)
         println(io, "Module does not export any names.")
     else
-        println(io, "# Exported names:")
+        println(io, "# Exported names")
         print(io, "  `")
         join(io, exports, "`, `")
-        println(io, "`")
+        println(io, "`\n")
+    end
+    if !isnothing(readme_path)
+        readme_lines = readlines(readme_path)
+        isempty(readme_lines) && return  # don't say we are going to print empty file
+        println(io, "# Displaying contents of readme found at `$(readme_path)`")
+        for line in first(readme_lines, nlines)
+            println(io, line)
+        end
+        length(readme_lines) > nlines && println(io, "\n[output truncated to first $nlines lines]")
     end
 end
 
@@ -370,10 +402,18 @@ function symbol_latex(s::String)
 
     return get(symbols_latex, s, "")
 end
-function repl_latex(io::IO, s::String)
-    # decompose NFC-normalized identifier to match tab-completion input
-    s = normalize(s, :NFD)
-    latex = symbol_latex(s)
+function repl_latex(io::IO, s0::String)
+    # This has rampant `Core.Box` problems (#15276). Use the tricks of
+    # https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-captured
+    # We're changing some of the values so the `let` trick isn't applicable.
+    s::String = s0
+    latex::String = symbol_latex(s)
+    if isempty(latex)
+        # Decompose NFC-normalized identifier to match tab-completion
+        # input if the first search came up empty.
+        s = normalize(s, :NFD)
+        latex = symbol_latex(s)
+    end
     if !isempty(latex)
         print(io, "\"")
         printstyled(io, s, color=:cyan)
@@ -384,7 +424,7 @@ function repl_latex(io::IO, s::String)
         print(io, "\"")
         printstyled(io, s, color=:cyan)
         print(io, "\" can be typed by ")
-        state = '\0'
+        state::Char = '\0'
         with_output_color(:cyan, io) do io
             for c in s
                 cstr = string(c)
@@ -550,8 +590,10 @@ function matchinds(needle, haystack; acronym::Bool = false)
     is = Int[]
     lastc = '\0'
     for (i, char) in enumerate(haystack)
+        while !isempty(chars) && isspace(first(chars))
+            popfirst!(chars) # skip spaces
+        end
         isempty(chars) && break
-        while chars[1] == ' ' popfirst!(chars) end # skip spaces
         if lowercase(char) == lowercase(chars[1]) &&
            (!acronym || !isletter(lastc))
             push!(is, i)
diff --git a/stdlib/REPL/src/emoji_symbols.jl b/stdlib/REPL/src/emoji_symbols.jl
index 40f943cf246ddf..49a55c97f6564c 100644
--- a/stdlib/REPL/src/emoji_symbols.jl
+++ b/stdlib/REPL/src/emoji_symbols.jl
@@ -1,21 +1,37 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#=
+#==
+using Pkg: @pkg_str
+pkg"activate --temp"
+pkg"add JSON@0.21"
+
 import JSON
-emojis = JSON.parsefile(download("https://raw.githubusercontent.com/iamcal/emoji-data/0f0cf4ea8845eb52d26df2a48c3c31c3b8cad14e/emoji_pretty.json"))
 
-result = Dict()
-for emj in emojis
-    name = "\\:" * emj["short_name"] * ":"
-    unicode = emj["unified"]
-    if '-' in unicode
-        continue
+function emoji_data(url)
+    emojis = JSON.parsefile(download(url))
+    result = Dict()
+    for emj in emojis
+        name = "\\:" * emj["short_name"] * ":"
+        unicode = emj["unified"]
+        if '-' in unicode
+            continue
+        end
+        result[name] = "$(Char(parse(UInt32, unicode, base = 16)))"
     end
-    result[name] = "$(Char(parse(UInt32, unicode, base = 16)))"
+    return result
 end
 
-skeys = sort(collect(keys(result)))
+# We combine multiple versions as the data changes, and not only by growing.
+result = mapfoldr(emoji_data, merge, [
+    # Newer versions must be added to the bottom list as we want the newer versions to
+    # overwrite the old with names that changed but still keep old ones that were removed
+    "https://raw.githubusercontent.com/iamcal/emoji-data/0f0cf4ea8845eb52d26df2a48c3c31c3b8cad14e/emoji_pretty.json",
+    "https://raw.githubusercontent.com/iamcal/emoji-data/e512953312c012f6bd00e3f2ef6bf152ca3710f8/emoji_pretty.json",
+    ];
+    init=Dict()
+)
 
+skeys = sort(collect(keys(result)))
 open("emoji_symbols.jl", "w") do fh
     println(fh, "const emoji_symbols = Dict(")
     for key in skeys
@@ -34,14 +50,22 @@ const emoji_symbols = Dict(
     "\\:8ball:" => "🎱",
     "\\:a:" => "🅰",
     "\\:ab:" => "🆎",
+    "\\:abacus:" => "🧮",
     "\\:abc:" => "🔤",
     "\\:abcd:" => "🔡",
     "\\:accept:" => "🉑",
+    "\\:accordion:" => "🪗",
+    "\\:adhesive_bandage:" => "🩹",
+    "\\:adult:" => "🧑",
     "\\:aerial_tramway:" => "🚡",
     "\\:airplane:" => "✈",
+    "\\:airplane_arriving:" => "🛬",
+    "\\:airplane_departure:" => "🛫",
     "\\:alarm_clock:" => "⏰",
     "\\:alien:" => "👽",
     "\\:ambulance:" => "🚑",
+    "\\:amphora:" => "🏺",
+    "\\:anatomical_heart:" => "🫀",
     "\\:anchor:" => "⚓",
     "\\:angel:" => "👼",
     "\\:anger:" => "💢",
@@ -76,40 +100,59 @@ const emoji_symbols = Dict(
     "\\:astonished:" => "😲",
     "\\:athletic_shoe:" => "👟",
     "\\:atm:" => "🏧",
+    "\\:auto_rickshaw:" => "🛺",
+    "\\:avocado:" => "🥑",
+    "\\:axe:" => "🪓",
     "\\:b:" => "🅱",
     "\\:baby:" => "👶",
     "\\:baby_bottle:" => "🍼",
     "\\:baby_chick:" => "🐤",
     "\\:baby_symbol:" => "🚼",
     "\\:back:" => "🔙",
+    "\\:bacon:" => "🥓",
+    "\\:badger:" => "🦡",
+    "\\:badminton_racquet_and_shuttlecock:" => "🏸",
+    "\\:bagel:" => "🥯",
     "\\:baggage_claim:" => "🛄",
+    "\\:baguette_bread:" => "🥖",
+    "\\:ballet_shoes:" => "🩰",
     "\\:balloon:" => "🎈",
     "\\:ballot_box_with_check:" => "☑",
     "\\:bamboo:" => "🎍",
     "\\:banana:" => "🍌",
     "\\:bangbang:" => "‼",
+    "\\:banjo:" => "🪕",
     "\\:bank:" => "🏦",
     "\\:bar_chart:" => "📊",
     "\\:barber:" => "💈",
     "\\:baseball:" => "⚾",
+    "\\:basket:" => "🧺",
     "\\:basketball:" => "🏀",
+    "\\:bat:" => "🦇",
     "\\:bath:" => "🛀",
     "\\:bathtub:" => "🛁",
     "\\:battery:" => "🔋",
     "\\:bear:" => "🐻",
+    "\\:bearded_person:" => "🧔",
+    "\\:beaver:" => "🦫",
     "\\:bee:" => "🐝",
     "\\:beer:" => "🍺",
     "\\:beers:" => "🍻",
-    "\\:beetle:" => "🐞",
+    "\\:beetle:" => "🪲",
     "\\:beginner:" => "🔰",
     "\\:bell:" => "🔔",
+    "\\:bell_pepper:" => "🫑",
     "\\:bento:" => "🍱",
+    "\\:beverage_box:" => "🧃",
     "\\:bicyclist:" => "🚴",
     "\\:bike:" => "🚲",
     "\\:bikini:" => "👙",
+    "\\:billed_cap:" => "🧢",
     "\\:bird:" => "🐦",
     "\\:birthday:" => "🎂",
+    "\\:bison:" => "🦬",
     "\\:black_circle:" => "⚫",
+    "\\:black_heart:" => "🖤",
     "\\:black_joker:" => "🃏",
     "\\:black_large_square:" => "⬛",
     "\\:black_medium_small_square:" => "◾",
@@ -122,59 +165,88 @@ const emoji_symbols = Dict(
     "\\:blue_book:" => "📘",
     "\\:blue_car:" => "🚙",
     "\\:blue_heart:" => "💙",
+    "\\:blueberries:" => "🫐",
     "\\:blush:" => "😊",
     "\\:boar:" => "🐗",
     "\\:boat:" => "⛵",
     "\\:bomb:" => "💣",
+    "\\:bone:" => "🦴",
     "\\:book:" => "📖",
     "\\:bookmark:" => "🔖",
     "\\:bookmark_tabs:" => "📑",
     "\\:books:" => "📚",
     "\\:boom:" => "💥",
+    "\\:boomerang:" => "🪃",
     "\\:boot:" => "👢",
     "\\:bouquet:" => "💐",
     "\\:bow:" => "🙇",
+    "\\:bow_and_arrow:" => "🏹",
+    "\\:bowl_with_spoon:" => "🥣",
     "\\:bowling:" => "🎳",
+    "\\:boxing_glove:" => "🥊",
     "\\:boy:" => "👦",
+    "\\:brain:" => "🧠",
     "\\:bread:" => "🍞",
+    "\\:breast-feeding:" => "🤱",
+    "\\:bricks:" => "🧱",
     "\\:bride_with_veil:" => "👰",
     "\\:bridge_at_night:" => "🌉",
     "\\:briefcase:" => "💼",
+    "\\:briefs:" => "🩲",
+    "\\:broccoli:" => "🥦",
     "\\:broken_heart:" => "💔",
+    "\\:broom:" => "🧹",
+    "\\:brown_heart:" => "🤎",
+    "\\:bubble_tea:" => "🧋",
+    "\\:bucket:" => "🪣",
     "\\:bug:" => "🐛",
     "\\:bulb:" => "💡",
     "\\:bullettrain_front:" => "🚅",
     "\\:bullettrain_side:" => "🚄",
+    "\\:burrito:" => "🌯",
     "\\:bus:" => "🚌",
     "\\:busstop:" => "🚏",
     "\\:bust_in_silhouette:" => "👤",
     "\\:busts_in_silhouette:" => "👥",
+    "\\:butter:" => "🧈",
+    "\\:butterfly:" => "🦋",
     "\\:cactus:" => "🌵",
     "\\:cake:" => "🍰",
     "\\:calendar:" => "📆",
+    "\\:call_me_hand:" => "🤙",
     "\\:calling:" => "📲",
     "\\:camel:" => "🐫",
     "\\:camera:" => "📷",
+    "\\:camera_with_flash:" => "📸",
     "\\:cancer:" => "♋",
     "\\:candy:" => "🍬",
+    "\\:canned_food:" => "🥫",
+    "\\:canoe:" => "🛶",
     "\\:capital_abcd:" => "🔠",
     "\\:capricorn:" => "♑",
     "\\:car:" => "🚗",
     "\\:card_index:" => "📇",
     "\\:carousel_horse:" => "🎠",
+    "\\:carpentry_saw:" => "🪚",
+    "\\:carrot:" => "🥕",
     "\\:cat2:" => "🐈",
     "\\:cat:" => "🐱",
     "\\:cd:" => "💿",
+    "\\:chair:" => "🪑",
+    "\\:champagne:" => "🍾",
     "\\:chart:" => "💹",
     "\\:chart_with_downwards_trend:" => "📉",
     "\\:chart_with_upwards_trend:" => "📈",
     "\\:checkered_flag:" => "🏁",
+    "\\:cheese_wedge:" => "🧀",
     "\\:cherries:" => "🍒",
     "\\:cherry_blossom:" => "🌸",
     "\\:chestnut:" => "🌰",
     "\\:chicken:" => "🐔",
+    "\\:child:" => "🧒",
     "\\:children_crossing:" => "🚸",
     "\\:chocolate_bar:" => "🍫",
+    "\\:chopsticks:" => "🥢",
     "\\:christmas_tree:" => "🎄",
     "\\:church:" => "⛪",
     "\\:cinema:" => "🎦",
@@ -184,6 +256,7 @@ const emoji_symbols = Dict(
     "\\:cl:" => "🆑",
     "\\:clap:" => "👏",
     "\\:clapper:" => "🎬",
+    "\\:clinking_glasses:" => "🥂",
     "\\:clipboard:" => "📋",
     "\\:clock1030:" => "🕥",
     "\\:clock10:" => "🕙",
@@ -213,10 +286,17 @@ const emoji_symbols = Dict(
     "\\:closed_lock_with_key:" => "🔐",
     "\\:closed_umbrella:" => "🌂",
     "\\:cloud:" => "☁",
+    "\\:clown_face:" => "🤡",
     "\\:clubs:" => "♣",
+    "\\:coat:" => "🧥",
+    "\\:cockroach:" => "🪳",
     "\\:cocktail:" => "🍸",
+    "\\:coconut:" => "🥥",
     "\\:coffee:" => "☕",
+    "\\:coin:" => "🪙",
+    "\\:cold_face:" => "🥶",
     "\\:cold_sweat:" => "😰",
+    "\\:compass:" => "🧭",
     "\\:computer:" => "💻",
     "\\:confetti_ball:" => "🎊",
     "\\:confounded:" => "😖",
@@ -235,20 +315,30 @@ const emoji_symbols = Dict(
     "\\:couplekiss:" => "💏",
     "\\:cow2:" => "🐄",
     "\\:cow:" => "🐮",
+    "\\:crab:" => "🦀",
     "\\:credit_card:" => "💳",
     "\\:crescent_moon:" => "🌙",
+    "\\:cricket:" => "🦗",
+    "\\:cricket_bat_and_ball:" => "🏏",
     "\\:crocodile:" => "🐊",
+    "\\:croissant:" => "🥐",
+    "\\:crossed_fingers:" => "🤞",
     "\\:crossed_flags:" => "🎌",
     "\\:crown:" => "👑",
     "\\:cry:" => "😢",
     "\\:crying_cat_face:" => "😿",
     "\\:crystal_ball:" => "🔮",
+    "\\:cucumber:" => "🥒",
+    "\\:cup_with_straw:" => "🥤",
+    "\\:cupcake:" => "🧁",
     "\\:cupid:" => "💘",
+    "\\:curling_stone:" => "🥌",
     "\\:curly_loop:" => "➰",
     "\\:currency_exchange:" => "💱",
     "\\:curry:" => "🍛",
     "\\:custard:" => "🍮",
     "\\:customs:" => "🛃",
+    "\\:cut_of_meat:" => "🥩",
     "\\:cyclone:" => "🌀",
     "\\:dancer:" => "💃",
     "\\:dancers:" => "👯",
@@ -256,15 +346,22 @@ const emoji_symbols = Dict(
     "\\:dart:" => "🎯",
     "\\:dash:" => "💨",
     "\\:date:" => "📅",
+    "\\:deaf_person:" => "🧏",
     "\\:deciduous_tree:" => "🌳",
+    "\\:deer:" => "🦌",
     "\\:department_store:" => "🏬",
     "\\:diamond_shape_with_a_dot_inside:" => "💠",
     "\\:diamonds:" => "♦",
     "\\:disappointed:" => "😞",
     "\\:disappointed_relieved:" => "😥",
+    "\\:disguised_face:" => "🥸",
+    "\\:diving_mask:" => "🤿",
+    "\\:diya_lamp:" => "🪔",
     "\\:dizzy:" => "💫",
     "\\:dizzy_face:" => "😵",
+    "\\:dna:" => "🧬",
     "\\:do_not_litter:" => "🚯",
+    "\\:dodo:" => "🦤",
     "\\:dog2:" => "🐕",
     "\\:dog:" => "🐶",
     "\\:dollar:" => "💵",
@@ -276,20 +373,29 @@ const emoji_symbols = Dict(
     "\\:dragon_face:" => "🐲",
     "\\:dress:" => "👗",
     "\\:dromedary_camel:" => "🐪",
+    "\\:drooling_face:" => "🤤",
+    "\\:drop_of_blood:" => "🩸",
     "\\:droplet:" => "💧",
+    "\\:drum_with_drumsticks:" => "🥁",
+    "\\:duck:" => "🦆",
+    "\\:dumpling:" => "🥟",
     "\\:dvd:" => "📀",
     "\\:e-mail:" => "📧",
+    "\\:eagle:" => "🦅",
     "\\:ear:" => "👂",
     "\\:ear_of_rice:" => "🌾",
+    "\\:ear_with_hearing_aid:" => "🦻",
     "\\:earth_africa:" => "🌍",
     "\\:earth_americas:" => "🌎",
     "\\:earth_asia:" => "🌏",
-    "\\:egg:" => "🍳",
+    "\\:egg:" => "🥚",
     "\\:eggplant:" => "🍆",
     "\\:eight_pointed_black_star:" => "✴",
     "\\:eight_spoked_asterisk:" => "✳",
     "\\:electric_plug:" => "🔌",
     "\\:elephant:" => "🐘",
+    "\\:elevator:" => "🛗",
+    "\\:elf:" => "🧝",
     "\\:email:" => "✉",
     "\\:end:" => "🔚",
     "\\:envelope_with_arrow:" => "📩",
@@ -298,22 +404,41 @@ const emoji_symbols = Dict(
     "\\:european_post_office:" => "🏤",
     "\\:evergreen_tree:" => "🌲",
     "\\:exclamation:" => "❗",
+    "\\:exploding_head:" => "🤯",
     "\\:expressionless:" => "😑",
     "\\:eyeglasses:" => "👓",
     "\\:eyes:" => "👀",
+    "\\:face_palm:" => "🤦",
+    "\\:face_vomiting:" => "🤮",
+    "\\:face_with_cowboy_hat:" => "🤠",
+    "\\:face_with_hand_over_mouth:" => "🤭",
+    "\\:face_with_head_bandage:" => "🤕",
+    "\\:face_with_monocle:" => "🧐",
+    "\\:face_with_raised_eyebrow:" => "🤨",
+    "\\:face_with_rolling_eyes:" => "🙄",
+    "\\:face_with_symbols_on_mouth:" => "🤬",
+    "\\:face_with_thermometer:" => "🤒",
     "\\:facepunch:" => "👊",
     "\\:factory:" => "🏭",
+    "\\:fairy:" => "🧚",
+    "\\:falafel:" => "🧆",
     "\\:fallen_leaf:" => "🍂",
     "\\:family:" => "👪",
     "\\:fast_forward:" => "⏩",
     "\\:fax:" => "📠",
     "\\:fearful:" => "😨",
+    "\\:feather:" => "🪶",
     "\\:feet:" => "🐾",
+    "\\:fencer:" => "🤺",
     "\\:ferris_wheel:" => "🎡",
+    "\\:field_hockey_stick_and_ball:" => "🏑",
     "\\:file_folder:" => "📁",
     "\\:fire:" => "🔥",
     "\\:fire_engine:" => "🚒",
+    "\\:fire_extinguisher:" => "🧯",
+    "\\:firecracker:" => "🧨",
     "\\:fireworks:" => "🎆",
+    "\\:first_place_medal:" => "🥇",
     "\\:first_quarter_moon:" => "🌓",
     "\\:first_quarter_moon_with_face:" => "🌛",
     "\\:fish:" => "🐟",
@@ -321,17 +446,27 @@ const emoji_symbols = Dict(
     "\\:fishing_pole_and_fish:" => "🎣",
     "\\:fist:" => "✊",
     "\\:flags:" => "🎏",
+    "\\:flamingo:" => "🦩",
     "\\:flashlight:" => "🔦",
+    "\\:flatbread:" => "🫓",
     "\\:floppy_disk:" => "💾",
     "\\:flower_playing_cards:" => "🎴",
     "\\:flushed:" => "😳",
+    "\\:fly:" => "🪰",
+    "\\:flying_disc:" => "🥏",
+    "\\:flying_saucer:" => "🛸",
     "\\:foggy:" => "🌁",
+    "\\:fondue:" => "🫕",
+    "\\:foot:" => "🦶",
     "\\:football:" => "🏈",
     "\\:footprints:" => "👣",
     "\\:fork_and_knife:" => "🍴",
+    "\\:fortune_cookie:" => "🥠",
     "\\:fountain:" => "⛲",
     "\\:four_leaf_clover:" => "🍀",
+    "\\:fox_face:" => "🦊",
     "\\:free:" => "🆓",
+    "\\:fried_egg:" => "🍳",
     "\\:fried_shrimp:" => "🍤",
     "\\:fries:" => "🍟",
     "\\:frog:" => "🐸",
@@ -340,25 +475,35 @@ const emoji_symbols = Dict(
     "\\:full_moon:" => "🌕",
     "\\:full_moon_with_face:" => "🌝",
     "\\:game_die:" => "🎲",
+    "\\:garlic:" => "🧄",
     "\\:gem:" => "💎",
     "\\:gemini:" => "♊",
+    "\\:genie:" => "🧞",
     "\\:ghost:" => "👻",
     "\\:gift:" => "🎁",
     "\\:gift_heart:" => "💝",
+    "\\:giraffe_face:" => "🦒",
     "\\:girl:" => "👧",
+    "\\:glass_of_milk:" => "🥛",
     "\\:globe_with_meridians:" => "🌐",
+    "\\:gloves:" => "🧤",
+    "\\:goal_net:" => "🥅",
     "\\:goat:" => "🐐",
+    "\\:goggles:" => "🥽",
     "\\:golf:" => "⛳",
+    "\\:gorilla:" => "🦍",
     "\\:grapes:" => "🍇",
     "\\:green_apple:" => "🍏",
     "\\:green_book:" => "📗",
     "\\:green_heart:" => "💚",
+    "\\:green_salad:" => "🥗",
     "\\:grey_exclamation:" => "❕",
     "\\:grey_question:" => "❔",
     "\\:grimacing:" => "😬",
     "\\:grin:" => "😁",
     "\\:grinning:" => "😀",
     "\\:guardsman:" => "💂",
+    "\\:guide_dog:" => "🦮",
     "\\:guitar:" => "🎸",
     "\\:gun:" => "🔫",
     "\\:haircut:" => "💇",
@@ -367,10 +512,13 @@ const emoji_symbols = Dict(
     "\\:hamster:" => "🐹",
     "\\:hand:" => "✋",
     "\\:handbag:" => "👜",
+    "\\:handball:" => "🤾",
+    "\\:handshake:" => "🤝",
     "\\:hankey:" => "💩",
     "\\:hatched_chick:" => "🐥",
     "\\:hatching_chick:" => "🐣",
     "\\:headphones:" => "🎧",
+    "\\:headstone:" => "🪦",
     "\\:hear_no_evil:" => "🙉",
     "\\:heart:" => "❤",
     "\\:heart_decoration:" => "💟",
@@ -385,24 +533,36 @@ const emoji_symbols = Dict(
     "\\:heavy_minus_sign:" => "➖",
     "\\:heavy_multiplication_x:" => "✖",
     "\\:heavy_plus_sign:" => "➕",
+    "\\:hedgehog:" => "🦔",
     "\\:helicopter:" => "🚁",
     "\\:herb:" => "🌿",
     "\\:hibiscus:" => "🌺",
     "\\:high_brightness:" => "🔆",
     "\\:high_heel:" => "👠",
+    "\\:hiking_boot:" => "🥾",
+    "\\:hindu_temple:" => "🛕",
+    "\\:hippopotamus:" => "🦛",
     "\\:hocho:" => "🔪",
     "\\:honey_pot:" => "🍯",
+    "\\:hook:" => "🪝",
     "\\:horse:" => "🐴",
     "\\:horse_racing:" => "🏇",
     "\\:hospital:" => "🏥",
+    "\\:hot_face:" => "🥵",
+    "\\:hotdog:" => "🌭",
     "\\:hotel:" => "🏨",
     "\\:hotsprings:" => "♨",
     "\\:hourglass:" => "⌛",
     "\\:hourglass_flowing_sand:" => "⏳",
     "\\:house:" => "🏠",
     "\\:house_with_garden:" => "🏡",
+    "\\:hugging_face:" => "🤗",
     "\\:hushed:" => "😯",
+    "\\:hut:" => "🛖",
+    "\\:i_love_you_hand_sign:" => "🤟",
     "\\:ice_cream:" => "🍨",
+    "\\:ice_cube:" => "🧊",
+    "\\:ice_hockey_stick_and_puck:" => "🏒",
     "\\:icecream:" => "🍦",
     "\\:id:" => "🆔",
     "\\:ideograph_advantage:" => "🉐",
@@ -421,8 +581,12 @@ const emoji_symbols = Dict(
     "\\:japanese_goblin:" => "👺",
     "\\:japanese_ogre:" => "👹",
     "\\:jeans:" => "👖",
+    "\\:jigsaw:" => "🧩",
     "\\:joy:" => "😂",
     "\\:joy_cat:" => "😹",
+    "\\:juggling:" => "🤹",
+    "\\:kaaba:" => "🕋",
+    "\\:kangaroo:" => "🦘",
     "\\:key:" => "🔑",
     "\\:keycap_ten:" => "🔟",
     "\\:kimono:" => "👘",
@@ -432,78 +596,136 @@ const emoji_symbols = Dict(
     "\\:kissing_closed_eyes:" => "😚",
     "\\:kissing_heart:" => "😘",
     "\\:kissing_smiling_eyes:" => "😙",
+    "\\:kite:" => "🪁",
+    "\\:kiwifruit:" => "🥝",
+    "\\:kneeling_person:" => "🧎",
+    "\\:knot:" => "🪢",
     "\\:koala:" => "🐨",
     "\\:koko:" => "🈁",
+    "\\:lab_coat:" => "🥼",
+    "\\:lacrosse:" => "🥍",
+    "\\:ladder:" => "🪜",
+    "\\:ladybug:" => "🐞",
     "\\:large_blue_circle:" => "🔵",
     "\\:large_blue_diamond:" => "🔷",
+    "\\:large_blue_square:" => "🟦",
+    "\\:large_brown_circle:" => "🟤",
+    "\\:large_brown_square:" => "🟫",
+    "\\:large_green_circle:" => "🟢",
+    "\\:large_green_square:" => "🟩",
+    "\\:large_orange_circle:" => "🟠",
     "\\:large_orange_diamond:" => "🔶",
+    "\\:large_orange_square:" => "🟧",
+    "\\:large_purple_circle:" => "🟣",
+    "\\:large_purple_square:" => "🟪",
+    "\\:large_red_square:" => "🟥",
+    "\\:large_yellow_circle:" => "🟡",
+    "\\:large_yellow_square:" => "🟨",
     "\\:last_quarter_moon:" => "🌗",
     "\\:last_quarter_moon_with_face:" => "🌜",
     "\\:laughing:" => "😆",
+    "\\:leafy_green:" => "🥬",
     "\\:leaves:" => "🍃",
     "\\:ledger:" => "📒",
+    "\\:left-facing_fist:" => "🤛",
     "\\:left_luggage:" => "🛅",
     "\\:left_right_arrow:" => "↔",
     "\\:leftwards_arrow_with_hook:" => "↩",
+    "\\:leg:" => "🦵",
     "\\:lemon:" => "🍋",
     "\\:leo:" => "♌",
     "\\:leopard:" => "🐆",
     "\\:libra:" => "♎",
     "\\:light_rail:" => "🚈",
     "\\:link:" => "🔗",
+    "\\:lion_face:" => "🦁",
     "\\:lips:" => "👄",
     "\\:lipstick:" => "💄",
+    "\\:lizard:" => "🦎",
+    "\\:llama:" => "🦙",
+    "\\:lobster:" => "🦞",
     "\\:lock:" => "🔒",
     "\\:lock_with_ink_pen:" => "🔏",
     "\\:lollipop:" => "🍭",
+    "\\:long_drum:" => "🪘",
     "\\:loop:" => "➿",
+    "\\:lotion_bottle:" => "🧴",
     "\\:loud_sound:" => "🔊",
     "\\:loudspeaker:" => "📢",
     "\\:love_hotel:" => "🏩",
     "\\:love_letter:" => "💌",
     "\\:low_brightness:" => "🔅",
+    "\\:luggage:" => "🧳",
+    "\\:lungs:" => "🫁",
+    "\\:lying_face:" => "🤥",
     "\\:m:" => "Ⓜ",
     "\\:mag:" => "🔍",
     "\\:mag_right:" => "🔎",
+    "\\:mage:" => "🧙",
+    "\\:magic_wand:" => "🪄",
+    "\\:magnet:" => "🧲",
     "\\:mahjong:" => "🀄",
     "\\:mailbox:" => "📫",
     "\\:mailbox_closed:" => "📪",
     "\\:mailbox_with_mail:" => "📬",
     "\\:mailbox_with_no_mail:" => "📭",
+    "\\:mammoth:" => "🦣",
     "\\:man:" => "👨",
+    "\\:man_and_woman_holding_hands:" => "👫",
+    "\\:man_dancing:" => "🕺",
     "\\:man_with_gua_pi_mao:" => "👲",
     "\\:man_with_turban:" => "👳",
+    "\\:mango:" => "🥭",
     "\\:mans_shoe:" => "👞",
+    "\\:manual_wheelchair:" => "🦽",
     "\\:maple_leaf:" => "🍁",
+    "\\:martial_arts_uniform:" => "🥋",
     "\\:mask:" => "😷",
     "\\:massage:" => "💆",
+    "\\:mate_drink:" => "🧉",
     "\\:meat_on_bone:" => "🍖",
+    "\\:mechanical_arm:" => "🦾",
+    "\\:mechanical_leg:" => "🦿",
     "\\:mega:" => "📣",
     "\\:melon:" => "🍈",
     "\\:memo:" => "📝",
+    "\\:menorah_with_nine_branches:" => "🕎",
     "\\:mens:" => "🚹",
+    "\\:merperson:" => "🧜",
     "\\:metro:" => "🚇",
+    "\\:microbe:" => "🦠",
     "\\:microphone:" => "🎤",
     "\\:microscope:" => "🔬",
+    "\\:middle_finger:" => "🖕",
+    "\\:military_helmet:" => "🪖",
     "\\:milky_way:" => "🌌",
     "\\:minibus:" => "🚐",
     "\\:minidisc:" => "💽",
+    "\\:mirror:" => "🪞",
     "\\:mobile_phone_off:" => "📴",
+    "\\:money_mouth_face:" => "🤑",
     "\\:money_with_wings:" => "💸",
     "\\:moneybag:" => "💰",
     "\\:monkey:" => "🐒",
     "\\:monkey_face:" => "🐵",
     "\\:monorail:" => "🚝",
     "\\:moon:" => "🌔",
+    "\\:moon_cake:" => "🥮",
     "\\:mortar_board:" => "🎓",
+    "\\:mosque:" => "🕌",
+    "\\:mosquito:" => "🦟",
+    "\\:motor_scooter:" => "🛵",
+    "\\:motorized_wheelchair:" => "🦼",
     "\\:mount_fuji:" => "🗻",
     "\\:mountain_bicyclist:" => "🚵",
     "\\:mountain_cableway:" => "🚠",
     "\\:mountain_railway:" => "🚞",
     "\\:mouse2:" => "🐁",
     "\\:mouse:" => "🐭",
+    "\\:mouse_trap:" => "🪤",
     "\\:movie_camera:" => "🎥",
     "\\:moyai:" => "🗿",
+    "\\:mrs_claus:" => "🤶",
     "\\:muscle:" => "💪",
     "\\:mushroom:" => "🍄",
     "\\:musical_keyboard:" => "🎹",
@@ -512,8 +734,12 @@ const emoji_symbols = Dict(
     "\\:mute:" => "🔇",
     "\\:nail_care:" => "💅",
     "\\:name_badge:" => "📛",
+    "\\:nauseated_face:" => "🤢",
+    "\\:nazar_amulet:" => "🧿",
     "\\:necktie:" => "👔",
     "\\:negative_squared_cross_mark:" => "❎",
+    "\\:nerd_face:" => "🤓",
+    "\\:nesting_dolls:" => "🪆",
     "\\:neutral_face:" => "😐",
     "\\:new:" => "🆕",
     "\\:new_moon:" => "🌑",
@@ -521,6 +747,7 @@ const emoji_symbols = Dict(
     "\\:newspaper:" => "📰",
     "\\:ng:" => "🆖",
     "\\:night_with_stars:" => "🌃",
+    "\\:ninja:" => "🥷",
     "\\:no_bell:" => "🔕",
     "\\:no_bicycles:" => "🚳",
     "\\:no_entry:" => "⛔",
@@ -539,55 +766,89 @@ const emoji_symbols = Dict(
     "\\:o2:" => "🅾",
     "\\:o:" => "⭕",
     "\\:ocean:" => "🌊",
+    "\\:octagonal_sign:" => "🛑",
     "\\:octopus:" => "🐙",
     "\\:oden:" => "🍢",
     "\\:office:" => "🏢",
     "\\:ok:" => "🆗",
     "\\:ok_hand:" => "👌",
     "\\:ok_woman:" => "🙆",
+    "\\:older_adult:" => "🧓",
     "\\:older_man:" => "👴",
     "\\:older_woman:" => "👵",
+    "\\:olive:" => "🫒",
     "\\:on:" => "🔛",
     "\\:oncoming_automobile:" => "🚘",
     "\\:oncoming_bus:" => "🚍",
     "\\:oncoming_police_car:" => "🚔",
     "\\:oncoming_taxi:" => "🚖",
+    "\\:one-piece_swimsuit:" => "🩱",
+    "\\:onion:" => "🧅",
     "\\:open_file_folder:" => "📂",
     "\\:open_hands:" => "👐",
     "\\:open_mouth:" => "😮",
     "\\:ophiuchus:" => "⛎",
     "\\:orange_book:" => "📙",
+    "\\:orange_heart:" => "🧡",
+    "\\:orangutan:" => "🦧",
+    "\\:otter:" => "🦦",
     "\\:outbox_tray:" => "📤",
+    "\\:owl:" => "🦉",
     "\\:ox:" => "🐂",
+    "\\:oyster:" => "🦪",
     "\\:package:" => "📦",
     "\\:page_facing_up:" => "📄",
     "\\:page_with_curl:" => "📃",
     "\\:pager:" => "📟",
     "\\:palm_tree:" => "🌴",
+    "\\:palms_up_together:" => "🤲",
+    "\\:pancakes:" => "🥞",
     "\\:panda_face:" => "🐼",
     "\\:paperclip:" => "📎",
+    "\\:parachute:" => "🪂",
     "\\:parking:" => "🅿",
+    "\\:parrot:" => "🦜",
     "\\:part_alternation_mark:" => "〽",
     "\\:partly_sunny:" => "⛅",
+    "\\:partying_face:" => "🥳",
     "\\:passport_control:" => "🛂",
     "\\:peach:" => "🍑",
+    "\\:peacock:" => "🦚",
+    "\\:peanuts:" => "🥜",
     "\\:pear:" => "🍐",
     "\\:pencil2:" => "✏",
     "\\:penguin:" => "🐧",
     "\\:pensive:" => "😔",
+    "\\:people_hugging:" => "🫂",
     "\\:performing_arts:" => "🎭",
     "\\:persevere:" => "😣",
+    "\\:person_climbing:" => "🧗",
+    "\\:person_doing_cartwheel:" => "🤸",
     "\\:person_frowning:" => "🙍",
+    "\\:person_in_lotus_position:" => "🧘",
+    "\\:person_in_steamy_room:" => "🧖",
+    "\\:person_in_tuxedo:" => "🤵",
     "\\:person_with_blond_hair:" => "👱",
+    "\\:person_with_headscarf:" => "🧕",
     "\\:person_with_pouting_face:" => "🙎",
+    "\\:petri_dish:" => "🧫",
     "\\:phone:" => "☎",
+    "\\:pickup_truck:" => "🛻",
+    "\\:pie:" => "🥧",
     "\\:pig2:" => "🐖",
     "\\:pig:" => "🐷",
     "\\:pig_nose:" => "🐽",
     "\\:pill:" => "💊",
+    "\\:pinata:" => "🪅",
+    "\\:pinched_fingers:" => "🤌",
+    "\\:pinching_hand:" => "🤏",
     "\\:pineapple:" => "🍍",
     "\\:pisces:" => "♓",
     "\\:pizza:" => "🍕",
+    "\\:placard:" => "🪧",
+    "\\:place_of_worship:" => "🛐",
+    "\\:pleading_face:" => "🥺",
+    "\\:plunger:" => "🪠",
     "\\:point_down:" => "👇",
     "\\:point_left:" => "👈",
     "\\:point_right:" => "👉",
@@ -595,16 +856,24 @@ const emoji_symbols = Dict(
     "\\:point_up_2:" => "👆",
     "\\:police_car:" => "🚓",
     "\\:poodle:" => "🐩",
+    "\\:popcorn:" => "🍿",
     "\\:post_office:" => "🏣",
     "\\:postal_horn:" => "📯",
     "\\:postbox:" => "📮",
     "\\:potable_water:" => "🚰",
+    "\\:potato:" => "🥔",
+    "\\:potted_plant:" => "🪴",
     "\\:pouch:" => "👝",
     "\\:poultry_leg:" => "🍗",
     "\\:pound:" => "💷",
     "\\:pouting_cat:" => "😾",
     "\\:pray:" => "🙏",
+    "\\:prayer_beads:" => "📿",
+    "\\:pregnant_woman:" => "🤰",
+    "\\:pretzel:" => "🥨",
+    "\\:prince:" => "🤴",
     "\\:princess:" => "👸",
+    "\\:probing_cane:" => "🦯",
     "\\:purple_heart:" => "💜",
     "\\:purse:" => "👛",
     "\\:pushpin:" => "📌",
@@ -612,19 +881,24 @@ const emoji_symbols = Dict(
     "\\:question:" => "❓",
     "\\:rabbit2:" => "🐇",
     "\\:rabbit:" => "🐰",
+    "\\:raccoon:" => "🦝",
     "\\:racehorse:" => "🐎",
     "\\:radio:" => "📻",
     "\\:radio_button:" => "🔘",
     "\\:rage:" => "😡",
     "\\:railway_car:" => "🚃",
     "\\:rainbow:" => "🌈",
+    "\\:raised_back_of_hand:" => "🤚",
     "\\:raised_hands:" => "🙌",
     "\\:raising_hand:" => "🙋",
     "\\:ram:" => "🐏",
     "\\:ramen:" => "🍜",
     "\\:rat:" => "🐀",
+    "\\:razor:" => "🪒",
+    "\\:receipt:" => "🧾",
     "\\:recycle:" => "♻",
     "\\:red_circle:" => "🔴",
+    "\\:red_envelope:" => "🧧",
     "\\:registered:" => "®",
     "\\:relaxed:" => "☺",
     "\\:relieved:" => "😌",
@@ -633,14 +907,22 @@ const emoji_symbols = Dict(
     "\\:restroom:" => "🚻",
     "\\:revolving_hearts:" => "💞",
     "\\:rewind:" => "⏪",
+    "\\:rhinoceros:" => "🦏",
     "\\:ribbon:" => "🎀",
     "\\:rice:" => "🍚",
     "\\:rice_ball:" => "🍙",
     "\\:rice_cracker:" => "🍘",
     "\\:rice_scene:" => "🎑",
+    "\\:right-facing_fist:" => "🤜",
     "\\:ring:" => "💍",
+    "\\:ringed_planet:" => "🪐",
+    "\\:robot_face:" => "🤖",
+    "\\:rock:" => "🪨",
     "\\:rocket:" => "🚀",
+    "\\:roll_of_paper:" => "🧻",
     "\\:roller_coaster:" => "🎢",
+    "\\:roller_skate:" => "🛼",
+    "\\:rolling_on_the_floor_laughing:" => "🤣",
     "\\:rooster:" => "🐓",
     "\\:rose:" => "🌹",
     "\\:rotating_light:" => "🚨",
@@ -650,41 +932,70 @@ const emoji_symbols = Dict(
     "\\:runner:" => "🏃",
     "\\:running_shirt_with_sash:" => "🎽",
     "\\:sa:" => "🈂",
+    "\\:safety_pin:" => "🧷",
+    "\\:safety_vest:" => "🦺",
     "\\:sagittarius:" => "♐",
     "\\:sake:" => "🍶",
+    "\\:salt:" => "🧂",
     "\\:sandal:" => "👡",
+    "\\:sandwich:" => "🥪",
     "\\:santa:" => "🎅",
+    "\\:sari:" => "🥻",
     "\\:satellite:" => "📡",
+    "\\:satellite_antenna:" => "📡",
+    "\\:sauropod:" => "🦕",
     "\\:saxophone:" => "🎷",
+    "\\:scarf:" => "🧣",
     "\\:school:" => "🏫",
     "\\:school_satchel:" => "🎒",
     "\\:scissors:" => "✂",
+    "\\:scooter:" => "🛴",
+    "\\:scorpion:" => "🦂",
     "\\:scorpius:" => "♏",
     "\\:scream:" => "😱",
     "\\:scream_cat:" => "🙀",
+    "\\:screwdriver:" => "🪛",
     "\\:scroll:" => "📜",
+    "\\:seal:" => "🦭",
     "\\:seat:" => "💺",
+    "\\:second_place_medal:" => "🥈",
     "\\:secret:" => "㊙",
     "\\:see_no_evil:" => "🙈",
     "\\:seedling:" => "🌱",
+    "\\:selfie:" => "🤳",
+    "\\:sewing_needle:" => "🪡",
+    "\\:shallow_pan_of_food:" => "🥘",
+    "\\:shark:" => "🦈",
     "\\:shaved_ice:" => "🍧",
     "\\:sheep:" => "🐑",
     "\\:shell:" => "🐚",
     "\\:ship:" => "🚢",
     "\\:shirt:" => "👕",
+    "\\:shopping_trolley:" => "🛒",
+    "\\:shorts:" => "🩳",
     "\\:shower:" => "🚿",
+    "\\:shrimp:" => "🦐",
+    "\\:shrug:" => "🤷",
+    "\\:shushing_face:" => "🤫",
     "\\:signal_strength:" => "📶",
     "\\:six_pointed_star:" => "🔯",
+    "\\:skateboard:" => "🛹",
     "\\:ski:" => "🎿",
-    "\\:skin-tone-2:" => "\U1f3fb",
-    "\\:skin-tone-3:" => "\U1f3fc",
-    "\\:skin-tone-4:" => "\U1f3fd",
-    "\\:skin-tone-5:" => "\U1f3fe",
-    "\\:skin-tone-6:" => "\U1f3ff",
+    "\\:skin-tone-2:" => "🏻",
+    "\\:skin-tone-3:" => "🏼",
+    "\\:skin-tone-4:" => "🏽",
+    "\\:skin-tone-5:" => "🏾",
+    "\\:skin-tone-6:" => "🏿",
     "\\:skull:" => "💀",
+    "\\:skunk:" => "🦨",
+    "\\:sled:" => "🛷",
     "\\:sleeping:" => "😴",
+    "\\:sleeping_accommodation:" => "🛌",
     "\\:sleepy:" => "😪",
+    "\\:slightly_frowning_face:" => "🙁",
+    "\\:slightly_smiling_face:" => "🙂",
     "\\:slot_machine:" => "🎰",
+    "\\:sloth:" => "🦥",
     "\\:small_blue_diamond:" => "🔹",
     "\\:small_orange_diamond:" => "🔸",
     "\\:small_red_triangle:" => "🔺",
@@ -693,17 +1004,24 @@ const emoji_symbols = Dict(
     "\\:smile_cat:" => "😸",
     "\\:smiley:" => "😃",
     "\\:smiley_cat:" => "😺",
+    "\\:smiling_face_with_3_hearts:" => "🥰",
+    "\\:smiling_face_with_tear:" => "🥲",
     "\\:smiling_imp:" => "😈",
     "\\:smirk:" => "😏",
     "\\:smirk_cat:" => "😼",
     "\\:smoking:" => "🚬",
     "\\:snail:" => "🐌",
     "\\:snake:" => "🐍",
+    "\\:sneezing_face:" => "🤧",
     "\\:snowboarder:" => "🏂",
     "\\:snowflake:" => "❄",
     "\\:snowman:" => "⛄",
+    "\\:snowman_without_snow:" => "⛄",
+    "\\:soap:" => "🧼",
     "\\:sob:" => "😭",
     "\\:soccer:" => "⚽",
+    "\\:socks:" => "🧦",
+    "\\:softball:" => "🥎",
     "\\:soon:" => "🔜",
     "\\:sos:" => "🆘",
     "\\:sound:" => "🔉",
@@ -718,45 +1036,71 @@ const emoji_symbols = Dict(
     "\\:speaker:" => "🔈",
     "\\:speech_balloon:" => "💬",
     "\\:speedboat:" => "🚤",
+    "\\:spock-hand:" => "🖖",
+    "\\:sponge:" => "🧽",
+    "\\:spoon:" => "🥄",
+    "\\:sports_medal:" => "🏅",
+    "\\:squid:" => "🦑",
+    "\\:standing_person:" => "🧍",
+    "\\:star-struck:" => "🤩",
     "\\:star2:" => "🌟",
     "\\:star:" => "⭐",
     "\\:stars:" => "🌠",
     "\\:station:" => "🚉",
     "\\:statue_of_liberty:" => "🗽",
     "\\:steam_locomotive:" => "🚂",
+    "\\:stethoscope:" => "🩺",
     "\\:stew:" => "🍲",
     "\\:straight_ruler:" => "📏",
     "\\:strawberry:" => "🍓",
     "\\:stuck_out_tongue:" => "😛",
     "\\:stuck_out_tongue_closed_eyes:" => "😝",
     "\\:stuck_out_tongue_winking_eye:" => "😜",
+    "\\:stuffed_flatbread:" => "🥙",
     "\\:sun_with_face:" => "🌞",
     "\\:sunflower:" => "🌻",
     "\\:sunglasses:" => "😎",
     "\\:sunny:" => "☀",
     "\\:sunrise:" => "🌅",
     "\\:sunrise_over_mountains:" => "🌄",
+    "\\:superhero:" => "🦸",
+    "\\:supervillain:" => "🦹",
     "\\:surfer:" => "🏄",
     "\\:sushi:" => "🍣",
     "\\:suspension_railway:" => "🚟",
+    "\\:swan:" => "🦢",
     "\\:sweat:" => "😓",
     "\\:sweat_drops:" => "💦",
     "\\:sweat_smile:" => "😅",
     "\\:sweet_potato:" => "🍠",
     "\\:swimmer:" => "🏊",
     "\\:symbols:" => "🔣",
+    "\\:synagogue:" => "🕍",
     "\\:syringe:" => "💉",
+    "\\:t-rex:" => "🦖",
+    "\\:table_tennis_paddle_and_ball:" => "🏓",
+    "\\:taco:" => "🌮",
     "\\:tada:" => "🎉",
+    "\\:takeout_box:" => "🥡",
+    "\\:tamale:" => "🫔",
     "\\:tanabata_tree:" => "🎋",
     "\\:tangerine:" => "🍊",
     "\\:taurus:" => "♉",
     "\\:taxi:" => "🚕",
     "\\:tea:" => "🍵",
+    "\\:teapot:" => "🫖",
+    "\\:teddy_bear:" => "🧸",
     "\\:telephone_receiver:" => "📞",
     "\\:telescope:" => "🔭",
     "\\:tennis:" => "🎾",
     "\\:tent:" => "⛺",
+    "\\:test_tube:" => "🧪",
+    "\\:the_horns:" => "🤘",
+    "\\:thinking_face:" => "🤔",
+    "\\:third_place_medal:" => "🥉",
+    "\\:thong_sandal:" => "🩴",
     "\\:thought_balloon:" => "💭",
+    "\\:thread:" => "🧵",
     "\\:ticket:" => "🎫",
     "\\:tiger2:" => "🐅",
     "\\:tiger:" => "🐯",
@@ -766,6 +1110,9 @@ const emoji_symbols = Dict(
     "\\:tokyo_tower:" => "🗼",
     "\\:tomato:" => "🍅",
     "\\:tongue:" => "👅",
+    "\\:toolbox:" => "🧰",
+    "\\:tooth:" => "🦷",
+    "\\:toothbrush:" => "🪥",
     "\\:top:" => "🔝",
     "\\:tophat:" => "🎩",
     "\\:tractor:" => "🚜",
@@ -784,6 +1131,8 @@ const emoji_symbols = Dict(
     "\\:truck:" => "🚚",
     "\\:trumpet:" => "🎺",
     "\\:tulip:" => "🌷",
+    "\\:tumbler_glass:" => "🥃",
+    "\\:turkey:" => "🦃",
     "\\:turtle:" => "🐢",
     "\\:tv:" => "📺",
     "\\:twisted_rightwards_arrows:" => "🔀",
@@ -802,11 +1151,15 @@ const emoji_symbols = Dict(
     "\\:u7981:" => "🈲",
     "\\:u7a7a:" => "🈳",
     "\\:umbrella:" => "☔",
+    "\\:umbrella_with_rain_drops:" => "☔",
     "\\:unamused:" => "😒",
     "\\:underage:" => "🔞",
+    "\\:unicorn_face:" => "🦄",
     "\\:unlock:" => "🔓",
     "\\:up:" => "🆙",
+    "\\:upside_down_face:" => "🙃",
     "\\:v:" => "✌",
+    "\\:vampire:" => "🧛",
     "\\:vertical_traffic_light:" => "🚦",
     "\\:vhs:" => "📼",
     "\\:vibration_mode:" => "📳",
@@ -815,15 +1168,19 @@ const emoji_symbols = Dict(
     "\\:violin:" => "🎻",
     "\\:virgo:" => "♍",
     "\\:volcano:" => "🌋",
+    "\\:volleyball:" => "🏐",
     "\\:vs:" => "🆚",
+    "\\:waffle:" => "🧇",
     "\\:walking:" => "🚶",
     "\\:waning_crescent_moon:" => "🌘",
     "\\:waning_gibbous_moon:" => "🌖",
     "\\:warning:" => "⚠",
     "\\:watch:" => "⌚",
     "\\:water_buffalo:" => "🐃",
+    "\\:water_polo:" => "🤽",
     "\\:watermelon:" => "🍉",
     "\\:wave:" => "👋",
+    "\\:waving_black_flag:" => "🏴",
     "\\:wavy_dash:" => "〰",
     "\\:waxing_crescent_moon:" => "🌒",
     "\\:wc:" => "🚾",
@@ -835,25 +1192,40 @@ const emoji_symbols = Dict(
     "\\:white_check_mark:" => "✅",
     "\\:white_circle:" => "⚪",
     "\\:white_flower:" => "💮",
+    "\\:white_heart:" => "🤍",
     "\\:white_large_square:" => "⬜",
     "\\:white_medium_small_square:" => "◽",
     "\\:white_medium_square:" => "◻",
     "\\:white_small_square:" => "▫",
     "\\:white_square_button:" => "🔳",
+    "\\:wilted_flower:" => "🥀",
     "\\:wind_chime:" => "🎐",
+    "\\:window:" => "🪟",
     "\\:wine_glass:" => "🍷",
     "\\:wink:" => "😉",
     "\\:wolf:" => "🐺",
     "\\:woman:" => "👩",
     "\\:womans_clothes:" => "👚",
+    "\\:womans_flat_shoe:" => "🥿",
     "\\:womans_hat:" => "👒",
     "\\:womens:" => "🚺",
+    "\\:wood:" => "🪵",
+    "\\:woozy_face:" => "🥴",
+    "\\:worm:" => "🪱",
     "\\:worried:" => "😟",
     "\\:wrench:" => "🔧",
+    "\\:wrestlers:" => "🤼",
     "\\:x:" => "❌",
+    "\\:yarn:" => "🧶",
+    "\\:yawning_face:" => "🥱",
     "\\:yellow_heart:" => "💛",
     "\\:yen:" => "💴",
+    "\\:yo-yo:" => "🪀",
     "\\:yum:" => "😋",
+    "\\:zany_face:" => "🤪",
     "\\:zap:" => "⚡",
+    "\\:zebra_face:" => "🦓",
+    "\\:zipper_mouth_face:" => "🤐",
+    "\\:zombie:" => "🧟",
     "\\:zzz:" => "💤",
 )
diff --git a/stdlib/REPL/src/latex_symbols.jl b/stdlib/REPL/src/latex_symbols.jl
index 57e41ed6705386..237aba92c45c7b 100644
--- a/stdlib/REPL/src/latex_symbols.jl
+++ b/stdlib/REPL/src/latex_symbols.jl
@@ -110,6 +110,8 @@ const latex_symbols = Dict(
     "\\backpprime" => "‶",
     "\\backppprime" => "‷",
     "\\xor" => "⊻",
+    "\\nand" => "⊼",
+    "\\nor" => "⊽",
     "\\iff" => "⟺",
     "\\implies" => "⟹",
     "\\impliedby" => "⟸",
@@ -669,8 +671,13 @@ const latex_symbols = Dict(
     "\\dashv" => "⊣",
     "\\top" => "⊤",
     "\\bot" => "⊥",
+    "\\Top" => "⫪",
+    "\\Bot" => "⫫",
+    "\\indep" => "⫫",
     "\\models" => "⊧",
     "\\vDash" => "⊨",
+    "\\downvDash" => "⫪",
+    "\\upvDash" => "⫫",
     "\\Vdash" => "⊩",
     "\\Vvdash" => "⊪",
     "\\VDash" => "⊫",
@@ -2622,13 +2629,14 @@ const latex_symbols = Dict(
 
 # Canonical reverse mapping for symbols that have several completions (#39148).
 #
-# These duplicate mappings can be investigated with the folllowing commands:
+# These duplicate mappings can be investigated with the following commands:
 #=
 ls = REPL.REPLCompletions.latex_symbols; symbols = values(ls)
 duplicates = [v for v in unique(symbols) if count(==(v), symbols) > 1]
 [(v, REPL.symbol_latex(v)) => findall(==(v), ls) for v in duplicates]
 =#
 const symbols_latex_canonical = Dict(
+    "⫫" => "\\Bot",
     "ð" => "\\dh",
     "…" => "\\ldots",
     "∅" => "\\emptyset",
@@ -2649,6 +2657,9 @@ const symbols_latex_canonical = Dict(
     "√" => "\\sqrt",
     "̶" => "\\sout",
     "→" => "\\to",
+    "⫪" => "\\Top",
     "ε" => "\\varepsilon",
     "⊻" => "\\xor",
+    "⊼" => "\\nand",
+    "⊽" => "\\nor",
 )
diff --git a/stdlib/REPL/src/options.jl b/stdlib/REPL/src/options.jl
index 09e3f4265fabe7..3ce0ab6ff00dc0 100644
--- a/stdlib/REPL/src/options.jl
+++ b/stdlib/REPL/src/options.jl
@@ -46,7 +46,7 @@ Options(;
         auto_indent_tmp_off = false,
         auto_indent_bracketed_paste = false,
         auto_indent_time_threshold = 0.005,
-        auto_refresh_time_delay = 0.05,
+        auto_refresh_time_delay = Sys.iswindows() ? 0.05 : 0.0,
         iocontext = Dict{Symbol,Any}()) =
             Options(hascolor, extra_keymap, tabwidth,
                     kill_ring_max, region_animation_duration,
diff --git a/stdlib/REPL/test/TerminalMenus/pager.jl b/stdlib/REPL/test/TerminalMenus/pager.jl
new file mode 100644
index 00000000000000..1d6579b8f5fc99
--- /dev/null
+++ b/stdlib/REPL/test/TerminalMenus/pager.jl
@@ -0,0 +1,39 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+content =
+    """
+    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+    incididunt ut labore et dolore magna aliqua. Arcu non sodales neque sodales.
+    Placerat orci nulla pellentesque dignissim enim sit amet venenatis. Mauris
+    augue neque gravida in fermentum et sollicitudin. Amet venenatis urna cursus
+    eget. Enim praesent elementum facilisis leo vel fringilla est. Vitae sapien
+    pellentesque habitant morbi tristique. Ornare lectus sit amet est placerat in.
+    Leo urna molestie at elementum eu facilisis. Aliquam vestibulum morbi blandit
+    cursus risus at ultrices. Id aliquet lectus proin nibh. Facilisi etiam
+    dignissim diam quis enim lobortis scelerisque fermentum. Pretium lectus quam id
+    leo in vitae turpis massa sed. Elementum facilisis leo vel fringilla est.
+    Vulputate ut pharetra sit amet aliquam. Quis enim lobortis scelerisque
+    fermentum dui faucibus in ornare. Cursus turpis massa tincidunt dui ut.
+
+    A arcu cursus vitae congue mauris rhoncus. Tellus rutrum tellus pellentesque
+    eu. Fringilla phasellus faucibus scelerisque eleifend donec pretium. Aliquam
+    etiam erat velit scelerisque. Volutpat lacus laoreet non curabitur gravida.
+    Felis imperdiet proin fermentum leo vel orci. Viverra tellus in hac habitasse
+    platea dictumst vestibulum rhoncus est. Ullamcorper dignissim cras tincidunt
+    lobortis feugiat vivamus. Sit amet luctus venenatis lectus. Odio facilisis
+    mauris sit amet massa vitae tortor condimentum. Purus sit amet volutpat
+    consequat mauris nunc congue. Enim nunc faucibus a pellentesque sit amet. Purus
+    non enim praesent elementum facilisis leo vel fringilla est.
+    """ |> strip
+
+let p = Pager(content)
+    @test p.pagesize == 10
+    @test length(p.lines) == 22
+    @test startswith(content, p.lines[1])
+    @test endswith(content, p.lines[end])
+    buffer = IOBuffer()
+    TerminalMenus.printmenu(buffer, p, 1)
+    str = String(take!(buffer))
+    @test contains(str, "(10 / 22)  45%")
+    @test endswith(str, "leo in vitae turpis massa sed. Elementum facilisis leo vel fringilla est.")
+end
diff --git a/stdlib/REPL/test/TerminalMenus/runtests.jl b/stdlib/REPL/test/TerminalMenus/runtests.jl
index fab105244d0a15..62a91cc0a12562 100644
--- a/stdlib/REPL/test/TerminalMenus/runtests.jl
+++ b/stdlib/REPL/test/TerminalMenus/runtests.jl
@@ -25,6 +25,7 @@ include("radio_menu.jl")
 include("multiselect_menu.jl")
 include("dynamic_menu.jl")
 include("multiselect_with_skip_menu.jl")
+include("pager.jl")
 
 # Legacy tests
 include("legacytests/old_radio_menu.jl")
diff --git a/stdlib/REPL/test/docview.jl b/stdlib/REPL/test/docview.jl
index 5001a981a35f86..160544eb475d26 100644
--- a/stdlib/REPL/test/docview.jl
+++ b/stdlib/REPL/test/docview.jl
@@ -41,3 +41,13 @@ end
     # https://github.com/JuliaLang/julia/issues/37757
     @test REPL.insert_hlines(IOBuffer(), nothing) === nothing
 end
+
+@testset "fuzzy score" begin
+    # https://github.com/JunoLab/FuzzyCompletions.jl/issues/7
+    # shouldn't throw when there is a space in a middle of query
+    @test (REPL.matchinds("a ", "a file.txt"); true)
+end
+
+@testset "Unicode doc lookup (#41589)" begin
+    @test REPL.lookup_doc(:(÷=)) isa Markdown.MD
+end
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 577dfda1a811bf..3fbf6d8825bba8 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -453,7 +453,8 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         # In the future if we want we can add a test that the right object
         # gets displayed by intercepting the display
         repl.specialdisplay = REPL.REPLDisplay(repl)
-        @async write(devnull, stdout_read) # redirect stdout to devnull so we drain the output pipe
+
+        errormonitor(@async write(devnull, stdout_read)) # redirect stdout to devnull so we drain the output pipe
 
         repl.interface = REPL.setup_interface(repl)
         repl_mode = repl.interface.modes[1]
@@ -749,6 +750,32 @@ fake_repl() do stdin_write, stdout_read, repl
     readuntil(stdout_read, "begin")
     @test readuntil(stdout_read, "end", keep=true) == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7Cend"
 
+    # Test switching repl modes
+    sendrepl2("""\e[200~
+            julia> A = 1
+            1
+
+            shell> echo foo
+            foo
+
+            shell> echo foo
+                   foo
+            foo foo
+
+            help?> Int
+            Dummy docstring
+
+                Some text
+
+                julia> error("If this error throws, the paste handler has failed to ignore this docstring example")
+
+            julia> B = 2
+            2\e[201~
+             """)
+    wait(c)
+    @test Main.A == 1
+    @test Main.B == 2
+
     # Close repl
     write(stdin_write, '\x04')
     Base.wait(repltask)
@@ -850,7 +877,7 @@ mutable struct Error19864 <: Exception; end
 function test19864()
     @eval Base.showerror(io::IO, e::Error19864) = print(io, "correct19864")
     buf = IOBuffer()
-    fake_response = (Any[(Error19864(), Ptr{Cvoid}[])], true)
+    fake_response = (Base.ExceptionStack([(exception=Error19864(),backtrace=Ptr{Cvoid}[])]),true)
     REPL.print_response(buf, fake_response, false, false, nothing)
     return String(take!(buf))
 end
@@ -1095,6 +1122,9 @@ end
 # Issue 39427
 @test occursin("does not exist", sprint(show, help_result(":=")))
 
+# Issue #40563
+@test occursin("does not exist", sprint(show, help_result("..")))
+
 # Issue #25930
 
 # Brief and extended docs (issue #25930)
@@ -1252,7 +1282,7 @@ end
 # AST transformations (softscope, Revise, OhMyREPL, etc.)
 @testset "AST Transformation" begin
     backend = REPL.REPLBackend()
-    @async REPL.start_repl_backend(backend)
+    errormonitor(@async REPL.start_repl_backend(backend))
     put!(backend.repl_channel, (:(1+1), false))
     reply = take!(backend.response_channel)
     @test reply == Pair{Any, Bool}(2, false)
@@ -1291,3 +1321,48 @@ Base.wait(frontend_task)
 macro throw_with_linenumbernode(err)
     Expr(:block, LineNumberNode(42, Symbol("test.jl")), :(() -> throw($err)))
 end
+
+@testset "Install missing packages via hooks" begin
+    @testset "Parse AST for packages" begin
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo, Bar"))
+        @test mods == [:Foo, :Bar]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo, Bar"))
+        @test mods == [:Foo, :Bar]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo.bar, Foo.baz"))
+        @test mods == [:Foo]
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo end"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false if false using Foo end end"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo, Bar end"))
+        @test mods == [:Foo, :Bar]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo: bar end"))
+        @test mods == [:Foo]
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo.bar as baz"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using .Foo"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Base"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Base: nope"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Main"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Core"))
+        @test isempty(mods)
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line(":(using Foo)"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("ex = :(using Foo)"))
+        @test isempty(mods)
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("Foo"))
+        @test isempty(mods)
+    end
+end
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index 8a1d2f39a18f42..671afd6c30073e 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -32,6 +32,10 @@ let ex = quote
             :()
         end
 
+        primitive type NonStruct 8 end
+        Base.propertynames(::NonStruct) = (:a, :b, :c)
+        x = reinterpret(NonStruct, 0x00)
+
         # Support non-Dict AbstractDicts, #19441
         mutable struct CustomDict{K, V} <: AbstractDict{K, V}
             mydict::Dict{K, V}
@@ -64,6 +68,8 @@ let ex = quote
         test6()=[a, a]
         test7() = rand(Bool) ? 1 : 1.0
         test8() = Any[1][1]
+        test9(x::Char) = pass
+        test9(x::Char, i::Int) = pass
         kwtest(; x=1, y=2, w...) = pass
         kwtest2(a; x=1, y=2, w...) = pass
 
@@ -97,10 +103,11 @@ function map_completion_text(completions)
     return map(completion_text, c), r, res
 end
 
-test_complete(s) = map_completion_text(@inferred(completions(s,lastindex(s))))
-test_scomplete(s) =  map_completion_text(@inferred(shell_completions(s,lastindex(s))))
-test_bslashcomplete(s) =  map_completion_text(@inferred(bslash_completions(s,lastindex(s)))[2])
-test_complete_context(s) =  map_completion_text(@inferred(completions(s,lastindex(s),Main.CompletionFoo)))
+test_complete(s) = map_completion_text(@inferred(completions(s, lastindex(s))))
+test_scomplete(s) =  map_completion_text(@inferred(shell_completions(s, lastindex(s))))
+test_bslashcomplete(s) =  map_completion_text(@inferred(bslash_completions(s, lastindex(s)))[2])
+test_complete_context(s, m) =  map_completion_text(@inferred(completions(s,lastindex(s), m)))
+test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo)
 
 module M32377 end
 test_complete_32377(s) = map_completion_text(completions(s,lastindex(s), M32377))
@@ -117,6 +124,10 @@ let s = "using REP"
     @test count(isequal("REPL"), c) == 1
     # issue #30234
     @test !Base.isbindingresolved(M32377, :tanh)
+    # check what happens if REPL is already imported
+    M32377.eval(:(using REPL))
+    c, r = test_complete_32377(s)
+    @test count(isequal("REPL"), c) == 1
 end
 
 let s = "Comp"
@@ -293,7 +304,7 @@ end
 
 # test latex symbol completion in getindex expressions (#24705)
 let s = "tuple[\\alpha"
-    c, r, res = test_complete_context(s)
+    c, r, res = test_complete_foo(s)
     @test c[1] == "α"
     @test r == 7:12
     @test length(c) == 1
@@ -511,6 +522,34 @@ for s in ("CompletionFoo.kwtest2(1; x=1,",
     @test occursin("a; x, y, w...", c[1])
 end
 
+#################################################################
+
+# method completion with `?` (arbitrary method with given argument types)
+let s = "CompletionFoo.?([1,2,3], 2.0)"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test(x::AbstractArray{T}, y) where T<:Real", str), c)
+    @test  any(str->occursin("test(args...)", str), c)
+    @test !any(str->occursin("test3(x::AbstractArray{Int", str), c)
+    @test !any(str->occursin("test4", str), c)
+end
+
+let s = "CompletionFoo.?('c')"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test9(x::Char)", str), c)
+    @test !any(str->occursin("test9(x::Char, i::Int", str), c)
+end
+
+let s = "CompletionFoo.?('c'"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test9(x::Char)", str), c)
+    @test  any(str->occursin("test9(x::Char, i::Int", str), c)
+end
+
+#################################################################
+
 # Test of inference based getfield completion
 let s = "(1+2im)."
     c,r = test_complete(s)
@@ -983,13 +1022,13 @@ end
 
 # No CompletionFoo.CompletionFoo
 let s = ""
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test !("CompletionFoo" in c)
 end
 
 # Can see `rand()` after `using Random`
 let s = "r"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "rand" in c
     @test r == 1:1
     @test s[r] == "r"
@@ -997,7 +1036,7 @@ end
 
 # Can see `Test.AbstractTestSet` after `import Test`
 let s = "Test.A"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "AbstractTestSet" in c
     @test r == 6:6
     @test s[r] == "A"
@@ -1005,21 +1044,21 @@ end
 
 # Can complete relative import
 let s = "import ..M"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test_broken "Main" in c
     @test r == 10:10
     @test s[r] == "M"
 end
 
 let s = ""
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "bar" in c
     @test r === 1:0
     @test s[r] == ""
 end
 
 let s = "f"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "foo" in c
     @test r == 1:1
     @test s[r] == "f"
@@ -1027,7 +1066,7 @@ let s = "f"
 end
 
 let s = "@f"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "@foobar" in c
     @test r == 1:2
     @test s[r] == "@f"
@@ -1035,48 +1074,48 @@ let s = "@f"
 end
 
 let s = "type_test.x"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "xx" in c
     @test r == 11:11
     @test s[r] == "x"
 end
 
 let s = "bar.no_val_available"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test length(c)==0
 end
 
 let s = "type_test.xx.y"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "yy" in c
     @test r == 14:14
     @test s[r] == "y"
 end
 
 let s = ":(function foo(::Int) end).args[1].args[2]."
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test c == Any[]
 end
 
 let s = "log(log.(x),"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test !isempty(c)
 end
 
 let s = "Base.return_types(getin"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "getindex" in c
     @test r == 19:23
     @test s[r] == "getin"
 end
 
 let s = "using Test, Random"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test !("RandomDevice" in c)
 end
 
 let s = "test(1,1, "
-    c, r, res = test_complete_context(s)
+    c, r, res = test_complete_foo(s)
     @test !res
     @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Int, Int})))
     @test length(c) == 3
@@ -1085,7 +1124,7 @@ let s = "test(1,1, "
 end
 
 let s = "test.(1,1, "
-    c, r, res = test_complete_context(s)
+    c, r, res = test_complete_foo(s)
     @test !res
     @test length(c) == 4
     @test r == 1:4
@@ -1093,7 +1132,7 @@ let s = "test.(1,1, "
 end
 
 let s = "prevind(\"θ\",1,"
-    c, r, res = test_complete_context(s)
+    c, r, res = test_complete_foo(s)
     @test c[1] == string(first(methods(prevind, Tuple{String, Int})))
     @test r == 1:7
     @test s[r] == "prevind"
@@ -1101,11 +1140,64 @@ end
 
 # Issue #32840
 let s = "typeof(+)."
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test length(c) == length(fieldnames(DataType))
 end
 
 let s = "test_dict[\"ab"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test c == Any["\"abc\"", "\"abcd\""]
 end
+
+let s = "CompletionFoo.x."
+    c, r = test_complete(s)
+    @test "a" in c
+end
+
+# https://github.com/JuliaLang/julia/issues/27184
+let
+    (test_complete("@noexist."); @test true)
+    (test_complete("Main.@noexist."); @test true)
+    (test_complete("@Main.noexist."); @test true)
+end
+
+@testset "https://github.com/JuliaLang/julia/issues/40247" begin
+    # getfield type completion can work for complicated expression
+
+    let
+        m = Module()
+        @eval m begin
+            struct Rs
+                rs::Vector{Regex}
+            end
+            var = nothing
+            function foo()
+                global var = 1
+                return Rs([r"foo"])
+            end
+        end
+
+        c, r = test_complete_context("foo().rs[1].", m)
+        @test m.var ≠ 1 # getfield type completion should never execute `foo()`
+        @test length(c) == fieldcount(Regex)
+    end
+
+    let
+        m = Module()
+        @eval m begin
+            struct R
+                r::Regex
+            end
+            var = nothing
+            function foo()
+                global var = 1
+                return R(r"foo")
+            end
+        end
+
+        c, r = test_complete_context("foo().r.", m)
+        # the current implementation of `REPL.REPLCompletions.completions(::String, ::Int, ::Module)`
+        # cuts off "foo().r." to `.r.`, and the getfield type completion doesn't work for this simpler case
+        @test_broken length(c) == fieldcount(Regex)
+    end
+end
diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml
index 6aa9f653745399..6958e618d3ea87 100644
--- a/stdlib/Random/Project.toml
+++ b/stdlib/Random/Project.toml
@@ -9,6 +9,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [targets]
-test = ["Test", "SparseArrays", "LinearAlgebra", "Future"]
+test = ["Test", "SparseArrays", "LinearAlgebra", "Future", "Statistics"]
diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md
index ca86de44ecce4e..f5508781ef27be 100644
--- a/stdlib/Random/docs/src/index.md
+++ b/stdlib/Random/docs/src/index.md
@@ -4,20 +4,22 @@
 DocTestSetup = :(using Random)
 ```
 
-Random number generation in Julia uses the [Mersenne Twister library](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/#dSFMT)
-via `MersenneTwister` objects. Julia has a global RNG, which is used by default. Other RNG types
-can be plugged in by inheriting the `AbstractRNG` type; they can then be used to have multiple
-streams of random numbers. Besides `MersenneTwister`, Julia also provides the `RandomDevice` RNG
-type, which is a wrapper over the OS provided entropy.
-
-Most functions related to random generation accept an optional `AbstractRNG` object as first argument,
-which defaults to the global one if not provided. Moreover, some of them accept optionally
-dimension specifications `dims...` (which can be given as a tuple) to generate arrays of random
-values.  In a multi-threaded program, you should generally use different RNG objects from different threads
-in order to be thread-safe. However, the default global RNG is thread-safe as of Julia 1.3 (because
-it internally corresponds to a per-thread RNG).
-
-A `MersenneTwister` or `RandomDevice` RNG can generate uniformly random numbers of the following types:
+Random number generation in Julia uses the [Xoshiro256++](https://prng.di.unimi.it/) algorithm
+by default, with per-`Task` state.
+Other RNG types can be plugged in by inheriting the `AbstractRNG` type; they can then be used to
+obtain multiple streams of random numbers.
+Besides the default `TaskLocalRNG` type, the `Random` package also provides `MersenneTwister`,
+`RandomDevice` (which exposes OS-provided entropy), and `Xoshiro` (for explicitly-managed
+Xoshiro256++ streams).
+
+Most functions related to random generation accept an optional `AbstractRNG` object as first argument.
+Some also accept dimension specifications `dims...` (which can also be given as a tuple) to generate
+arrays of random values.
+In a multi-threaded program, you should generally use different RNG objects from different threads
+or tasks in order to be thread-safe. However, the default RNG is thread-safe as of Julia 1.3
+(using a per-thread RNG up to version 1.6, and per-task thereafter).
+
+The provided RNGs can generate uniform random numbers of the following types:
 [`Float16`](@ref), [`Float32`](@ref), [`Float64`](@ref), [`BigFloat`](@ref), [`Bool`](@ref),
 [`Int8`](@ref), [`UInt8`](@ref), [`Int16`](@ref), [`UInt16`](@ref), [`Int32`](@ref),
 [`UInt32`](@ref), [`Int64`](@ref), [`UInt64`](@ref), [`Int128`](@ref), [`UInt128`](@ref),
@@ -67,6 +69,8 @@ Random.shuffle!
 ```@docs
 Random.seed!
 Random.AbstractRNG
+Random.TaskLocalRNG
+Random.Xoshiro
 Random.MersenneTwister
 Random.RandomDevice
 ```
@@ -147,20 +151,20 @@ Scalar and array methods for `Die` now work as expected:
 
 ```jldoctest Die; setup = :(Random.seed!(1))
 julia> rand(Die)
-Die(15)
+Die(6)
 
 julia> rand(MersenneTwister(0), Die)
 Die(11)
 
 julia> rand(Die, 3)
 3-element Vector{Die}:
- Die(18)
- Die(5)
+ Die(15)
+ Die(19)
  Die(4)
 
 julia> a = Vector{Die}(undef, 3); rand!(a)
 3-element Vector{Die}:
- Die(5)
+ Die(17)
  Die(20)
  Die(15)
 ```
@@ -175,13 +179,13 @@ In order to define random generation out of objects of type `S`, the following m
 julia> Random.rand(rng::AbstractRNG, d::Random.SamplerTrivial{Die}) = rand(rng, 1:d[].nsides);
 
 julia> rand(Die(4))
-3
+1
 
 julia> rand(Die(4), 3)
 3-element Vector{Any}:
+ 3
  4
  1
- 1
 ```
 
 Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`.
diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl
index 5c29954f131321..c483296fe3af14 100644
--- a/stdlib/Random/src/RNGs.jl
+++ b/stdlib/Random/src/RNGs.jl
@@ -2,10 +2,6 @@
 
 ## RandomDevice
 
-# SamplerUnion(X, Y, ...}) == Union{SamplerType{X}, SamplerType{Y}, ...}
-SamplerUnion(U...) = Union{Any[SamplerType{T} for T in U]...}
-const SamplerBoolBitInteger = SamplerUnion(Bool, BitInteger_types...)
-
 if Sys.iswindows()
     struct RandomDevice <: AbstractRNG
         buffer::Vector{UInt128}
@@ -181,9 +177,9 @@ function show(io::IO, rng::MersenneTwister)
     seed = from_seed(rng.seed)
     seed_str = seed <= typemax(Int) ? string(seed) : "0x" * string(seed, base=16) # DWIM
     if rng.adv_jump == 0 && rng.adv == 0
-        return print(io, "MersenneTwister($seed_str)")
+        return print(io, MersenneTwister, "(", seed_str, ")")
     end
-    print(io, "MersenneTwister($seed_str, (")
+    print(io, MersenneTwister, "(", seed_str, ", (")
     # state
     adv = Integer[rng.adv_jump, rng.adv]
     if rng.adv_vals != -1 || rng.adv_ints != -1
@@ -359,53 +355,44 @@ function seed!(r::MersenneTwister, seed::Vector{UInt32})
     return r
 end
 
-seed!(r::MersenneTwister=default_rng()) = seed!(r, make_seed())
+seed!(r::MersenneTwister) = seed!(r, make_seed())
 seed!(r::MersenneTwister, n::Integer) = seed!(r, make_seed(n))
-seed!(seed::Union{Integer,Vector{UInt32}}) = seed!(default_rng(), seed)
 
 
 ### Global RNG
 
-const THREAD_RNGs = MersenneTwister[]
-@inline default_rng() = default_rng(Threads.threadid())
-@noinline function default_rng(tid::Int)
-    0 < tid <= length(THREAD_RNGs) || _rng_length_assert()
-    if @inbounds isassigned(THREAD_RNGs, tid)
-        @inbounds MT = THREAD_RNGs[tid]
-    else
-        MT = MersenneTwister()
-        @inbounds THREAD_RNGs[tid] = MT
-    end
-    return MT
-end
-@noinline _rng_length_assert() =  @assert false "0 < tid <= length(THREAD_RNGs)"
-
-function __init__()
-    resize!(empty!(THREAD_RNGs), Threads.nthreads()) # ensures that we didn't save a bad object
-end
-
-
 struct _GLOBAL_RNG <: AbstractRNG
     global const GLOBAL_RNG = _GLOBAL_RNG.instance
 end
 
-# GLOBAL_RNG currently represents a MersenneTwister
-typeof_rng(::_GLOBAL_RNG) = MersenneTwister
+# GLOBAL_RNG currently uses TaskLocalRNG
+typeof_rng(::_GLOBAL_RNG) = TaskLocalRNG
 
-copy!(dst::MersenneTwister, ::_GLOBAL_RNG) = copy!(dst, default_rng())
-copy!(::_GLOBAL_RNG, src::MersenneTwister) = copy!(default_rng(), src)
+@inline default_rng() = TaskLocalRNG()
+@inline default_rng(tid::Int) = TaskLocalRNG()
+
+copy!(dst::Xoshiro, ::_GLOBAL_RNG) = copy!(dst, default_rng())
+copy!(::_GLOBAL_RNG, src::Xoshiro) = copy!(default_rng(), src)
 copy(::_GLOBAL_RNG) = copy(default_rng())
 
-seed!(::_GLOBAL_RNG, seed::Vector{UInt32}) = seed!(default_rng(), seed)
-seed!(::_GLOBAL_RNG, n::Integer) = seed!(default_rng(), n)
-seed!(::_GLOBAL_RNG, ::Nothing) = seed!(default_rng(), nothing)
-seed!(::_GLOBAL_RNG) = seed!(default_rng(), nothing)
+GLOBAL_SEED = 0
+
+function seed!(::_GLOBAL_RNG, seed=rand(RandomDevice(), UInt64, 4))
+    global GLOBAL_SEED = seed
+    seed!(default_rng(), seed)
+end
+
+seed!(rng::_GLOBAL_RNG, ::Nothing) = seed!(rng)  # to resolve ambiguity
+
+seed!(seed::Union{Nothing,Integer,Vector{UInt32},Vector{UInt64},NTuple{4,UInt64}}=nothing) =
+    seed!(GLOBAL_RNG, seed)
 
 rng_native_52(::_GLOBAL_RNG) = rng_native_52(default_rng())
 rand(::_GLOBAL_RNG, sp::SamplerBoolBitInteger) = rand(default_rng(), sp)
 for T in (:(SamplerTrivial{UInt52Raw{UInt64}}),
           :(SamplerTrivial{UInt2x52Raw{UInt128}}),
           :(SamplerTrivial{UInt104Raw{UInt128}}),
+          :(SamplerTrivial{CloseOpen01_64}),
           :(SamplerTrivial{CloseOpen12_64}),
           :(SamplerUnion(Int64, UInt64, Int128, UInt128)),
           :(SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)),
@@ -423,6 +410,10 @@ for T in BitInteger_types
     @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerType{$T}) = rand!(default_rng(), A, I)
 end
 
+function __init__()
+    seed!(GLOBAL_RNG)
+end
+
 
 ### generation
 
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index 2cdffd60672520..45aa6442eed7e9 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -27,7 +27,7 @@ export rand!, randn!,
        shuffle, shuffle!,
        randperm, randperm!,
        randcycle, randcycle!,
-       AbstractRNG, MersenneTwister, RandomDevice
+       AbstractRNG, MersenneTwister, RandomDevice, TaskLocalRNG, Xoshiro
 
 ## general definitions
 
@@ -291,11 +291,17 @@ rand(                ::Type{X}, dims::Dims) where {X} = rand(default_rng(), X, d
 rand(r::AbstractRNG, ::Type{X}, d::Integer, dims::Integer...) where {X} = rand(r, X, Dims((d, dims...)))
 rand(                ::Type{X}, d::Integer, dims::Integer...) where {X} = rand(X, Dims((d, dims...)))
 
+# SamplerUnion(X, Y, ...}) == Union{SamplerType{X}, SamplerType{Y}, ...}
+SamplerUnion(U...) = Union{Any[SamplerType{T} for T in U]...}
+const SamplerBoolBitInteger = SamplerUnion(Bool, BitInteger_types...)
 
+
+include("Xoshiro.jl")
 include("RNGs.jl")
 include("generation.jl")
 include("normal.jl")
 include("misc.jl")
+include("XoshiroSimd.jl")
 
 ## rand & rand! & seed! docstrings
 
@@ -386,7 +392,7 @@ After the call to `seed!`, `rng` is equivalent to a newly created
 object initialized with the same seed.
 
 If `rng` is not specified, it defaults to seeding the state of the
-shared thread-local generator.
+shared task-local generator.
 
 # Examples
 ```julia-repl
diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl
new file mode 100644
index 00000000000000..40da3c5ff17224
--- /dev/null
+++ b/stdlib/Random/src/Xoshiro.jl
@@ -0,0 +1,232 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## Xoshiro RNG
+# Lots of implementation is shared with TaskLocalRNG
+
+"""
+    Xoshiro
+
+Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and
+Sebastiano Vigna in "Scrambled Linear Pseudorandom Number Generators",
+ACM Trans. Math. Softw., 2021. Reference implementation is available
+at http://prng.di.unimi.it
+
+Apart from the high speed, Xoshiro has a small memory footprint, making it suitable for
+applications where many different random states need to be held for long time.
+
+Julia's Xoshiro implementation has a bulk-generation mode; this seeds new virtual PRNGs
+from the parent, and uses SIMD to generate in parallel (i.e. the bulk stream consists of
+multiple interleaved xoshiro instances).
+The virtual PRNGs are discarded once the bulk request has been serviced (and should cause
+no heap allocations).
+"""
+mutable struct Xoshiro <: AbstractRNG
+    s0::UInt64
+    s1::UInt64
+    s2::UInt64
+    s3::UInt64
+
+    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer) = new(s0, s1, s2, s3)
+    Xoshiro(seed=nothing) = seed!(new(), seed)
+end
+
+function setstate!(x::Xoshiro, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+    x.s0 = s0
+    x.s1 = s1
+    x.s2 = s2
+    x.s3 = s3
+    x
+end
+
+copy(rng::Xoshiro) = Xoshiro(rng.s0, rng.s1, rng.s2, rng.s3)
+
+function copy!(dst::Xoshiro, src::Xoshiro)
+    dst.s0, dst.s1, dst.s2, dst.s3 = src.s0, src.s1, src.s2, src.s3
+    dst
+end
+
+function ==(a::Xoshiro, b::Xoshiro)
+    a.s0 == b.s0 && a.s1 == b.s1 && a.s2 == b.s2 && a.s3 == b.s3
+end
+
+rng_native_52(::Xoshiro) = UInt64
+
+@inline function rand(rng::Xoshiro, ::SamplerType{UInt64})
+    s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
+    tmp = s0 + s3
+    res = tmp << 23 | tmp >> 41
+    t = s1 << 17
+    s2 = xor(s2, s0)
+    s3 = xor(s3, s1)
+    s1 = xor(s1, s2)
+    s0 = xor(s0, s3)
+    s2 = xor(s2, t)
+    s3 = s3 << 45 | s3 >> 19
+    rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
+    res
+end
+
+
+## Task local RNG
+
+"""
+    TaskLocalRNG
+
+The `TaskLocalRNG` has state that is local to its task, not its thread.
+It is seeded upon task creation, from the state of its parent task.
+Therefore, task creation is an event that changes the parent's RNG state.
+
+As an upside, the `TaskLocalRNG` is pretty fast, and permits reproducible
+multithreaded simulations (barring race conditions), independent of scheduler
+decisions. As long as the number of threads is not used to make decisions on
+task creation, simulation results are also independent of the number of available
+threads / CPUs. The random stream should not depend on hardware specifics, up to
+endianness and possibly word size.
+
+Using or seeding the RNG of any other task than the one returned by `current_task()`
+is undefined behavior: it will work most of the time, and may sometimes fail silently.
+"""
+struct TaskLocalRNG <: AbstractRNG end
+TaskLocalRNG(::Nothing) = TaskLocalRNG()
+rng_native_52(::TaskLocalRNG) = UInt64
+
+function setstate!(x::TaskLocalRNG, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+    t = current_task()
+    t.rngState0 = s0
+    t.rngState1 = s1
+    t.rngState2 = s2
+    t.rngState3 = s3
+    x
+end
+
+@inline function rand(::TaskLocalRNG, ::SamplerType{UInt64})
+    task = current_task()
+    s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
+    tmp = s0 + s3
+    res = tmp << 23 | tmp >> 41
+    t = s1 << 17
+    s2 = xor(s2, s0)
+    s3 = xor(s3, s1)
+    s1 = xor(s1, s2)
+    s0 = xor(s0, s3)
+    s2 = xor(s2, t)
+    s3 = s3 << 45 | s3 >> 19
+    task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
+    res
+end
+
+# Shared implementation between Xoshiro and TaskLocalRNG -- seeding
+
+function seed!(x::Union{TaskLocalRNG,Xoshiro})
+    # as we get good randomness from RandomDevice, we can skip hashing
+    parent = RandomDevice()
+    # Constants have nothing up their sleeve, see task.c
+    # 0x02011ce34bce797f == hash(UInt(1))|0x01
+    # 0x5a94851fb48a6e05 == hash(UInt(2))|0x01
+    # 0x3688cf5d48899fa7 == hash(UInt(3))|0x01
+    # 0x867b4bb4c42e5661 == hash(UInt(4))|0x01
+    setstate!(x,
+              0x02011ce34bce797f * rand(parent, UInt64),
+              0x5a94851fb48a6e05 * rand(parent, UInt64),
+              0x3688cf5d48899fa7 * rand(parent, UInt64),
+              0x867b4bb4c42e5661 * rand(parent, UInt64))
+end
+
+function seed!(rng::Union{TaskLocalRNG,Xoshiro}, seed::NTuple{4,UInt64})
+    # TODO: Consider a less ad-hoc construction
+    # We can afford burning a handful of cycles here, and we don't want any
+    # surprises with respect to bad seeds / bad interactions.
+
+    s0 = s  = Base.hash_64_64(seed[1])
+    s1 = s += Base.hash_64_64(seed[2])
+    s2 = s += Base.hash_64_64(seed[3])
+    s3 = s += Base.hash_64_64(seed[4])
+
+    setstate!(rng, s0, s1, s2, s3)
+
+    rand(rng, UInt64)
+    rand(rng, UInt64)
+    rand(rng, UInt64)
+    rand(rng, UInt64)
+    rng
+end
+
+function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::UInt128)
+    seed0 = seed % UInt64
+    seed1 = (seed>>>64) % UInt64
+    seed!(rng, (seed0, seed1, zero(UInt64), zero(UInt64)))
+end
+seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, UInt128(seed))
+
+function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::AbstractVector{UInt64})
+    if length(seed) > 4
+        throw(ArgumentError("seed should have no more than 256 bits"))
+    end
+    seed0 = length(seed)>0 ? seed[1] : UInt64(0)
+    seed1 = length(seed)>1 ? seed[2] : UInt64(0)
+    seed2 = length(seed)>2 ? seed[3] : UInt64(0)
+    seed3 = length(seed)>3 ? seed[4] : UInt64(0)
+    seed!(rng, (seed0, seed1, seed2, seed3))
+end
+
+function seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::AbstractVector{UInt32})
+    if iseven(length(seed))
+        seed!(rng, reinterpret(UInt64, seed))
+    else
+        seed!(rng, UInt64[reinterpret(UInt64, @view(seed[begin:end-1])); seed[end] % UInt64])
+    end
+end
+
+@inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt128})
+    first = rand(rng, UInt64)
+    second = rand(rng,UInt64)
+    second + UInt128(first)<<64
+end
+
+@inline rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{Int128}) = rand(rng, UInt128) % Int128
+
+@inline function rand(rng::Union{TaskLocalRNG, Xoshiro},
+                      T::SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64))
+    S = T[]
+    # use upper bits
+    (rand(rng, UInt64) >>> (64 - 8*sizeof(S))) % S
+end
+
+function copy(rng::TaskLocalRNG)
+    t = current_task()
+    Xoshiro(t.rngState0, t.rngState1, t.rngState2, t.rngState3)
+end
+
+function copy!(dst::TaskLocalRNG, src::Xoshiro)
+    t = current_task()
+    t.rngState0, t.rngState1, t.rngState2, t.rngState3 = src.s0, src.s1, src.s2, src.s3
+    dst
+end
+
+function copy!(dst::Xoshiro, src::TaskLocalRNG)
+    t = current_task()
+    dst.s0, dst.s1, dst.s2, dst.s3 = t.rngState0, t.rngState1, t.rngState2, t.rngState3
+    dst
+end
+
+function ==(a::Xoshiro, b::TaskLocalRNG)
+    t = current_task()
+    a.s0 == t.rngState0 && a.s1 == t.rngState1 && a.s2 == t.rngState2 && a.s3 == t.rngState3
+end
+
+==(a::TaskLocalRNG, b::Xoshiro) = b == a
+
+# for partial words, use upper bits from Xoshiro
+
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52Raw{UInt64}}) = rand(r, UInt64) >>> 12
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52{UInt64}})    = rand(r, UInt64) >>> 12
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt104{UInt128}})  = rand(r, UInt104Raw())
+
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float16}}) =
+    Float16(Float32(rand(r, UInt16) >>> 5) * Float32(0x1.0p-11))
+
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float32}}) =
+    Float32(rand(r, UInt32) >>> 8) * Float32(0x1.0p-24)
+
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01_64}) =
+    Float64(rand(r, UInt64) >>> 11) * 0x1.0p-53
diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl
new file mode 100644
index 00000000000000..e115533bb6fefb
--- /dev/null
+++ b/stdlib/Random/src/XoshiroSimd.jl
@@ -0,0 +1,308 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module XoshiroSimd
+# Getting the xoroshiro RNG to reliably vectorize is somewhat of a hassle without Simd.jl.
+import ..Random: TaskLocalRNG, rand, rand!, Xoshiro, CloseOpen01, UnsafeView,
+                 SamplerType, SamplerTrivial
+using Base: BitInteger_types
+using Core.Intrinsics: llvmcall
+
+# Vector-width. Influences random stream.
+xoshiroWidth() = Val(8)
+# Simd threshold. Influences random stream.
+simdThreshold(::Type{T}) where T = 64
+simdThreshold(::Type{Bool}) = 640
+
+@inline _rotl45(x::UInt64) = (x<<45)|(x>>19)
+@inline _shl17(x::UInt64) = x<<17
+@inline _rotl23(x::UInt64) = (x<<23)|(x>>41)
+@inline _plus(x::UInt64,y::UInt64) = x+y
+@inline _xor(x::UInt64,y::UInt64) = xor(x,y)
+@inline _and(x::UInt64, y::UInt64) = x & y
+@inline _or(x::UInt64, y::UInt64) = x | y
+@inline _lshr(x, y::Int32) = _lshr(x, y % Int64)
+@inline _lshr(x::UInt64, y::Int64) = llvmcall("""
+    %res = lshr i64 %0, %1
+    ret i64 %res
+    """,
+    UInt64,
+    Tuple{UInt64, Int64},
+    x, y)
+
+@inline _bits2float(x::UInt64, ::Type{Float64}) = reinterpret(UInt64, Float64(x >>> 11) * 0x1.0p-53)
+@inline function _bits2float(x::UInt64, ::Type{Float32})
+    #=
+    # this implementation uses more high bits, but is harder to vectorize
+    x = x >>> 16  # discard low 16 bits
+    u = Float32(x >>> 24) * Float32(0x1.0p-24)
+    l = Float32(x & 0x00ffffff) * Float32(0x1.0p-24)
+    =#
+    ui = (x>>>32) % UInt32
+    li = x % UInt32
+    u = Float32(ui >>> 8) * Float32(0x1.0p-24)
+    l = Float32(li >>> 8) * Float32(0x1.0p-24)
+    (UInt64(reinterpret(UInt32, u)) << 32) | UInt64(reinterpret(UInt32, l))
+end
+
+# required operations. These could be written more concisely with `ntuple`, but the compiler
+# sometimes refuses to properly vectorize.
+for N in [4,8,16]
+    let code, s, fshl = "llvm.fshl.v$(N)i64",
+        VT = :(NTuple{$N, VecElement{UInt64}})
+
+        s = ntuple(_->VecElement(UInt64(45)), N)
+        @eval @inline _rotl45(x::$VT) = ccall($fshl, llvmcall, $VT, ($VT, $VT, $VT), x, x, $s)
+
+        s = ntuple(_->VecElement(UInt64(23)), N)
+        @eval @inline _rotl23(x::$VT) = ccall($fshl, llvmcall, $VT, ($VT, $VT, $VT), x, x, $s)
+
+        code = """
+        %lshiftOp = shufflevector <1 x i64> <i64 17>, <1 x i64> undef, <$N x i32> zeroinitializer
+        %res = shl <$N x i64> %0, %lshiftOp
+        ret <$N x i64> %res
+        """
+        @eval @inline _shl17(x::$VT) = llvmcall($code, $VT, Tuple{$VT}, x)
+
+        code = """
+        %res = add <$N x i64> %1, %0
+        ret <$N x i64> %res
+        """
+        @eval @inline _plus(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y)
+
+        code = """
+        %res = xor <$N x i64> %1, %0
+        ret <$N x i64> %res
+        """
+        @eval @inline _xor(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y)
+
+        code = """
+        %res = and <$N x i64> %1, %0
+        ret <$N x i64> %res
+        """
+        @eval @inline _and(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y)
+
+        code = """
+        %res = or <$N x i64> %1, %0
+        ret <$N x i64> %res
+        """
+        @eval @inline _or(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y)
+
+        code = """
+        %tmp = insertelement <1 x i64> undef, i64 %1, i32 0
+        %shift = shufflevector <1 x i64> %tmp, <1 x i64> %tmp, <$N x i32> zeroinitializer
+        %res = lshr <$N x i64> %0, %shift
+        ret <$N x i64> %res
+        """
+        @eval @inline _lshr(x::$VT, y::Int64) = llvmcall($code, $VT, Tuple{$VT, Int64}, x, y)
+
+        code = """
+        %shiftamt = shufflevector <1 x i64> <i64 11>, <1 x i64> undef, <$N x i32> zeroinitializer
+        %sh = lshr <$N x i64> %0, %shiftamt
+        %f = uitofp <$N x i64> %sh to <$N x double>
+        %scale = shufflevector <1 x double> <double 0x3ca0000000000000>, <1 x double> undef, <$N x i32> zeroinitializer
+        %m = fmul <$N x double> %f, %scale
+        %i = bitcast <$N x double> %m to <$N x i64>
+        ret <$N x i64> %i
+        """
+        @eval @inline _bits2float(x::$VT, ::Type{Float64}) = llvmcall($code, $VT, Tuple{$VT}, x)
+
+        code = """
+        %as32 = bitcast <$N x i64> %0 to <$(2N) x i32>
+        %shiftamt = shufflevector <1 x i32> <i32 8>, <1 x i32> undef, <$(2N) x i32> zeroinitializer
+        %sh = lshr <$(2N) x i32> %as32, %shiftamt
+        %f = uitofp <$(2N) x i32> %sh to <$(2N) x float>
+        %scale = shufflevector <1 x float> <float 0x3e70000000000000>, <1 x float> undef, <$(2N) x i32> zeroinitializer
+        %m = fmul <$(2N) x float> %f, %scale
+        %i = bitcast <$(2N) x float> %m to <$N x i64>
+        ret <$N x i64> %i
+        """
+        @eval @inline _bits2float(x::$VT, ::Type{Float32}) = llvmcall($code, $VT, Tuple{$VT}, x)
+    end
+end
+
+
+function forkRand(rng::Union{TaskLocalRNG, Xoshiro}, ::Val{N}) where N
+    # constants have nothing up their sleeve. For more discussion, cf rng_split in task.c
+    # 0x02011ce34bce797f == hash(UInt(1))|0x01
+    # 0x5a94851fb48a6e05 == hash(UInt(2))|0x01
+    # 0x3688cf5d48899fa7 == hash(UInt(3))|0x01
+    # 0x867b4bb4c42e5661 == hash(UInt(4))|0x01
+    s0 = ntuple(i->VecElement(0x02011ce34bce797f * rand(rng, UInt64)), Val(N))
+    s1 = ntuple(i->VecElement(0x5a94851fb48a6e05 * rand(rng, UInt64)), Val(N))
+    s2 = ntuple(i->VecElement(0x3688cf5d48899fa7 * rand(rng, UInt64)), Val(N))
+    s3 = ntuple(i->VecElement(0x867b4bb4c42e5661 * rand(rng, UInt64)), Val(N))
+    (s0, s1, s2, s3)
+end
+
+_id(x, T) = x
+
+@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F}
+    if len >= simdThreshold(T)
+        written = xoshiro_bulk_simd(rng, dst, len, T, Val(N), f)
+        len -= written
+        dst += written
+    end
+    if len != 0
+        xoshiro_bulk_nosimd(rng, dst, len, T, f)
+    end
+    nothing
+end
+
+@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F) where {T, F}
+    if rng isa TaskLocalRNG
+        task = current_task()
+        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
+    else
+        (; s0, s1, s2, s3) = rng::Xoshiro
+    end
+
+    i = 0
+    while i+8 <= len
+        res = _rotl23(_plus(s0,s3))
+        unsafe_store!(reinterpret(Ptr{UInt64}, dst + i), f(res, T))
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+        i += 8
+    end
+    if i < len
+        res = _rotl23(_plus(s0,s3))
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+        ref = Ref(f(res, T))
+        # TODO: This may make the random-stream dependent on system endianness
+        ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i)
+    end
+    if rng isa TaskLocalRNG
+        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
+    else
+       rng.s0, rng.s1, rng.s2, rng.s3 =  s0, s1, s2, s3
+    end
+    nothing
+end
+
+@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, f)
+    if rng isa TaskLocalRNG
+        task = current_task()
+        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
+    else
+        (; s0, s1, s2, s3) = rng::Xoshiro
+    end
+
+    i = 0
+    while i+8 <= len
+        res = _rotl23(_plus(s0,s3))
+        shift = 0
+        while i+8 <= len && shift < 8
+            resLoc = _and(_lshr(res, shift), 0x0101010101010101)
+            unsafe_store!(reinterpret(Ptr{UInt64}, dst + i), resLoc)
+            i += 8
+            shift += 1
+        end
+
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+    end
+    if i < len
+        # we may overgenerate some bytes here, if len mod 64 <= 56 and len mod 8 != 0
+        res = _rotl23(_plus(s0,s3))
+        resLoc = _and(res, 0x0101010101010101)
+        ref = Ref(resLoc)
+        ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i)
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+    end
+    if rng isa TaskLocalRNG
+        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
+    else
+        rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
+    end
+    nothing
+end
+
+
+@noinline function xoshiro_bulk_simd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, ::Val{N}, f::F) where {T,N,F}
+    s0, s1, s2, s3 = forkRand(rng, Val(N))
+
+    i = 0
+    while i + 8*N <= len
+        res = _rotl23(_plus(s0,s3))
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+        unsafe_store!(reinterpret(Ptr{NTuple{N,VecElement{UInt64}}}, dst + i), f(res, T))
+        i += 8*N
+    end
+    return i
+end
+
+@noinline function xoshiro_bulk_simd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, ::Val{N}, f) where {N}
+    s0, s1, s2, s3 = forkRand(rng, Val(N))
+    msk = ntuple(i->VecElement(0x0101010101010101), Val(N))
+    i = 0
+    while i + 64*N <= len
+        res = _rotl23(_plus(s0,s3))
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+        for k=0:7
+            tmp = _lshr(res, k)
+            toWrite = _and(tmp, msk)
+            unsafe_store!(reinterpret(Ptr{NTuple{N,VecElement{UInt64}}}, dst + i + k*N*8), toWrite)
+        end
+        i += 64*N
+    end
+    return i
+end
+
+
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float32}, ::SamplerTrivial{CloseOpen01{Float32}})
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*4, Float32, xoshiroWidth(), _bits2float)
+    dst
+end
+
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float64}, ::SamplerTrivial{CloseOpen01{Float64}})
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*8, Float64, xoshiroWidth(), _bits2float)
+    dst
+end
+
+for T in BitInteger_types
+    @eval function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Union{Array{$T}, UnsafeView{$T}}, ::SamplerType{$T})
+        GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*sizeof($T), UInt8, xoshiroWidth())
+        dst
+    end
+end
+
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Bool}, ::SamplerType{Bool})
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst), Bool, xoshiroWidth())
+    dst
+end
+
+end # module
diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl
index ba23e16e6ca47a..ddbf6dce98bec3 100644
--- a/stdlib/Random/src/generation.jl
+++ b/stdlib/Random/src/generation.jl
@@ -172,10 +172,10 @@ end
 
 ### BitInteger
 
-# there are three implemented samplers for unit ranges, the two first of which
-# assume that Float64 (i.e. 52 random bits) is the native type for the RNG:
-# 1) "Fast" (SamplerRangeFast), which is most efficient when the underlying RNG produces
-#    rand(Float64) "fast enough".
+# there are three implemented samplers for unit ranges, the second one
+# assumes that Float64 (i.e. 52 random bits) is the native type for the RNG:
+# 1) "Fast" (SamplerRangeFast), which is most efficient when the range length is close
+#    (or equal) to a power of 2 from below.
 #    The tradeoff is faster creation of the sampler, but more consumption of entropy bits.
 # 2) "Slow" (SamplerRangeInt) which tries to use as few entropy bits as possible, at the
 #    cost of a bigger upfront price associated with the creation of the sampler.
@@ -224,20 +224,32 @@ function rand(rng::AbstractRNG, sp::SamplerRangeFast{UInt32,T}) where T
     (x + a % UInt32) % T
 end
 
+has_fast_64(rng::AbstractRNG) = rng_native_52(rng) != Float64
+# for MersenneTwister, both options have very similar performance
+
 function rand(rng::AbstractRNG, sp::SamplerRangeFast{UInt64,T}) where T
     a, bw, m, mask = sp.a, sp.bw, sp.m, sp.mask
-    x = bw <= 52 ? rand(rng, LessThan(m, Masked(mask, UInt52Raw()))) :
-                   rand(rng, LessThan(m, Masked(mask, uniform(UInt64))))
+    if !has_fast_64(rng) && bw <= 52
+        x = rand(rng, LessThan(m, Masked(mask, UInt52Raw())))
+    else
+        x = rand(rng, LessThan(m, Masked(mask, uniform(UInt64))))
+    end
     (x + a % UInt64) % T
 end
 
 function rand(rng::AbstractRNG, sp::SamplerRangeFast{UInt128,T}) where T
     a, bw, m, mask = sp.a, sp.bw, sp.m, sp.mask
-    x = bw <= 52  ?
-        rand(rng, LessThan(m % UInt64, Masked(mask % UInt64, UInt52Raw()))) % UInt128 :
-    bw <= 104 ?
-        rand(rng, LessThan(m, Masked(mask, UInt104Raw()))) :
-        rand(rng, LessThan(m, Masked(mask, uniform(UInt128))))
+    if has_fast_64(rng)
+        x = bw <= 64 ?
+            rand(rng, LessThan(m % UInt64, Masked(mask % UInt64, uniform(UInt64)))) % UInt128 :
+            rand(rng, LessThan(m, Masked(mask, uniform(UInt128))))
+    else
+        x = bw <= 52  ?
+            rand(rng, LessThan(m % UInt64, Masked(mask % UInt64, UInt52Raw()))) % UInt128 :
+        bw <= 104 ?
+            rand(rng, LessThan(m, Masked(mask, UInt104Raw()))) :
+            rand(rng, LessThan(m, Masked(mask, uniform(UInt128))))
+    end
     x % T + a
 end
 
@@ -346,45 +358,56 @@ end
 
 ### BigInt
 
-struct SamplerBigInt <: Sampler{BigInt}
+struct SamplerBigInt{SP<:Sampler{Limb}} <: Sampler{BigInt}
     a::BigInt         # first
     m::BigInt         # range length - 1
     nlimbs::Int       # number of limbs in generated BigInt's (z ∈ [0, m])
     nlimbsmax::Int    # max number of limbs for z+a
-    mask::Limb        # applied to the highest limb
+    highsp::SP        # sampler for the highest limb of z
 end
 
-function SamplerBigInt(r::AbstractUnitRange{BigInt})
+function SamplerBigInt(::Type{RNG}, r::AbstractUnitRange{BigInt}, N::Repetition=Val(Inf)
+                       ) where {RNG<:AbstractRNG}
     m = last(r) - first(r)
-    m < 0 && throw(ArgumentError("range must be non-empty"))
-    nd = ndigits(m, base=2)
-    nlimbs, highbits = divrem(nd, 8*sizeof(Limb))
-    highbits > 0 && (nlimbs += 1)
-    mask = highbits == 0 ? ~zero(Limb) : one(Limb)<<highbits - one(Limb)
+    m.size < 0 && throw(ArgumentError("range must be non-empty"))
+    nlimbs = Int(m.size)
+    hm = nlimbs == 0 ? Limb(0) : GC.@preserve m unsafe_load(m.d, nlimbs)
+    highsp = Sampler(RNG, Limb(0):hm, N)
     nlimbsmax = max(nlimbs, abs(last(r).size), abs(first(r).size))
-    return SamplerBigInt(first(r), m, nlimbs, nlimbsmax, mask)
+    return SamplerBigInt(first(r), m, nlimbs, nlimbsmax, highsp)
 end
 
-Sampler(::Type{<:AbstractRNG}, r::AbstractUnitRange{BigInt}, ::Repetition) = SamplerBigInt(r)
+Sampler(::Type{RNG}, r::AbstractUnitRange{BigInt}, N::Repetition) where {RNG<:AbstractRNG} =
+    SamplerBigInt(RNG, r, N)
 
 rand(rng::AbstractRNG, sp::SamplerBigInt) =
     rand!(rng, BigInt(nbits = sp.nlimbsmax*8*sizeof(Limb)), sp)
 
 function rand!(rng::AbstractRNG, x::BigInt, sp::SamplerBigInt)
+    nlimbs = sp.nlimbs
+    nlimbs == 0 && return MPZ.set!(x, sp.a)
     MPZ.realloc2!(x, sp.nlimbsmax*8*sizeof(Limb))
+    @assert x.alloc >= nlimbs
+    # we randomize x ∈ [0, m] with rejection sampling:
+    # 1. the first nlimbs-1 limbs of x are uniformly randomized
+    # 2. the high limb hx of x is sampled from 0:hm where hm is the
+    #    high limb of m
+    # We repeat 1. and 2. until x <= m
+    hm = GC.@preserve sp unsafe_load(sp.m.d, nlimbs)
     GC.@preserve x begin
-        limbs = UnsafeView(x.d, sp.nlimbs)
+        limbs = UnsafeView(x.d, nlimbs-1)
         while true
             rand!(rng, limbs)
-            limbs[end] &= sp.mask
-            MPZ.mpn_cmp(x, sp.m, sp.nlimbs) <= 0 && break
+            hx = limbs[nlimbs] = rand(rng, sp.highsp)
+            hx < hm && break # avoid calling mpn_cmp most of the time
+            MPZ.mpn_cmp(x, sp.m, nlimbs) <= 0 && break
         end
         # adjust x.size (normally done by mpz_limbs_finish, in GMP version >= 6)
-        x.size = sp.nlimbs
-        while x.size > 0
-            limbs[x.size] != 0 && break
-            x.size -= 1
+        while nlimbs > 0
+            limbs[nlimbs] != 0 && break
+            nlimbs -= 1
         end
+        x.size = nlimbs
     end
     MPZ.add!(x, sp.a)
 end
diff --git a/stdlib/Random/src/misc.jl b/stdlib/Random/src/misc.jl
index 3c09f5b30d257f..674c1d3bfe571b 100644
--- a/stdlib/Random/src/misc.jl
+++ b/stdlib/Random/src/misc.jl
@@ -53,13 +53,13 @@ number generator, see [Random Numbers](@ref).
 # Examples
 ```jldoctest
 julia> Random.seed!(3); randstring()
-"Y7m62wOj"
+"vZmAMp3z"
 
 julia> randstring(MersenneTwister(3), 'a':'z', 6)
 "ocucay"
 
 julia> randstring("ACGT")
-"ATTTGCGT"
+"CAAACACC"
 ```
 
 !!! note
@@ -71,7 +71,12 @@ function randstring end
 
 let b = UInt8['0':'9';'A':'Z';'a':'z']
     global randstring
-    randstring(r::AbstractRNG, chars=b, n::Integer=8) = String(rand(r, chars, n))
+    function randstring(r::AbstractRNG, chars=b, n::Integer=8)
+        T = eltype(chars)
+        v = T === UInt8 ? Base.StringVector(n) : Vector{T}(undef, n)
+        rand!(r, v, chars)
+        return String(v)
+    end
     randstring(r::AbstractRNG, n::Integer) = randstring(r, b, n)
     randstring(chars=b, n::Integer=8) = randstring(default_rng(), chars, n)
     randstring(n::Integer) = randstring(default_rng(), b, n)
diff --git a/stdlib/Random/src/normal.jl b/stdlib/Random/src/normal.jl
index 8638d3d62c6243..6bb4cd2c36ce80 100644
--- a/stdlib/Random/src/normal.jl
+++ b/stdlib/Random/src/normal.jl
@@ -44,10 +44,9 @@ julia> randn(rng, ComplexF32, (2, 3))
     inside the following function.
     =#
     @inbounds begin
-        r = rand(rng, UInt52Raw())
+        r = rand(rng, UInt52())
 
         # the following code is identical to the one in `_randn(rng::AbstractRNG, r::UInt64)`
-        r &= 0x000fffffffffffff
         rabs = Int64(r>>1) # One bit for the sign
         idx = rabs & 0xFF
         x = ifelse(r % Bool, -rabs, rabs)*wi[idx+1]
@@ -214,6 +213,22 @@ for randfun in [:randn, :randexp]
             A
         end
 
+        # optimization for Xoshiro, which randomizes natively Array{UInt64}
+        function $randfun!(rng::Union{Xoshiro, TaskLocalRNG}, A::Array{Float64})
+            if length(A) < 7
+                for i in eachindex(A)
+                    @inbounds A[i] = $randfun(rng, Float64)
+                end
+            else
+                GC.@preserve A rand!(rng, UnsafeView{UInt64}(pointer(A), length(A)))
+
+                for i in eachindex(A)
+                    @inbounds A[i] = $_randfun(rng, reinterpret(UInt64, A[i]) >>> 12)
+                end
+            end
+            A
+        end
+
         $randfun!(A::AbstractArray) = $randfun!(default_rng(), A)
 
         # generating arrays
diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl
index c502b14bc3a7f6..1995a9efbc471e 100644
--- a/stdlib/Random/test/runtests.jl
+++ b/stdlib/Random/test/runtests.jl
@@ -2,6 +2,7 @@
 
 using Test, SparseArrays
 using Test: guardseed
+using Statistics: mean
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
@@ -307,7 +308,7 @@ let a = [rand(RandomDevice(), UInt128) for i=1:10]
 end
 
 # test all rand APIs
-for rng in ([], [MersenneTwister(0)], [RandomDevice()])
+for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
     ftypes = [Float16, Float32, Float64]
     cftypes = [ComplexF16, ComplexF32, ComplexF64, ftypes...]
     types = [Bool, Char, BigFloat, Base.BitInteger_types..., ftypes...]
@@ -432,7 +433,7 @@ function hist(X, n)
 end
 
 # test uniform distribution of floats
-for rng in [MersenneTwister(), RandomDevice()],
+for rng in [MersenneTwister(), RandomDevice(), Xoshiro()],
     T in [Float16, Float32, Float64, BigFloat],
         prec in (T == BigFloat ? [3, 53, 64, 100, 256, 1000] : [256])
     setprecision(BigFloat, prec) do
@@ -453,7 +454,7 @@ end
         # but also for 3 linear combinations of positions (for the array version)
         lcs = unique!.([rand(1:n, 2), rand(1:n, 3), rand(1:n, 5)])
         aslcs = zeros(Int, 3)
-        for rng = (MersenneTwister(), RandomDevice())
+        for rng = (MersenneTwister(), RandomDevice(), Xoshiro())
             for scalar = [false, true]
                 fill!(a, 0)
                 fill!(as, 0)
@@ -477,8 +478,8 @@ end
     end
 end
 
-# test reproducility of methods
-let mta = MersenneTwister(42), mtb = MersenneTwister(42)
+@testset "reproducility of methods for $RNG" for RNG=(MersenneTwister,Xoshiro)
+    mta, mtb = RNG(42), RNG(42)
 
     @test rand(mta) == rand(mtb)
     @test rand(mta,10) == rand(mtb,10)
@@ -627,7 +628,7 @@ guardseed() do
     m = MersenneTwister(0)
     @test Random.seed!() === g
     @test Random.seed!(rand(UInt)) === g
-    @test Random.seed!(rand(UInt32, rand(1:10))) === g
+    @test Random.seed!(rand(UInt32, rand(1:8))) === g
     @test Random.seed!(m) === m
     @test Random.seed!(m, rand(UInt)) === m
     @test Random.seed!(m, rand(UInt32, rand(1:10))) === m
@@ -663,7 +664,7 @@ end
 # this shouldn't crash (#22403)
 @test_throws ArgumentError rand!(Union{UInt,Int}[1, 2, 3])
 
-@testset "$RNG() & Random.seed!(rng::$RNG) initializes randomly" for RNG in (MersenneTwister, RandomDevice)
+@testset "$RNG() & Random.seed!(rng::$RNG) initializes randomly" for RNG in (MersenneTwister, RandomDevice, Xoshiro)
     m = RNG()
     a = rand(m, Int)
     m = RNG()
@@ -684,11 +685,51 @@ end
     @test rand(m, Int) ∉ (a, b, c, d)
 end
 
-@testset "MersenneTwister($seed) & Random.seed!(m::MersenneTwister, $seed) produce the same stream" for seed in [0:5; 10000:10005]
-    m = MersenneTwister(seed)
-    a = [rand(m) for _=1:100]
-    Random.seed!(m, seed)
-    @test a == [rand(m) for _=1:100]
+@testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
+    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
+    if RNG == Xoshiro
+        push!(seeds, rand(UInt64, rand(1:4)), Tuple(rand(UInt64, 4)))
+    end
+    for seed=seeds
+        m = RNG(seed)
+        a = [rand(m) for _=1:100]
+        Random.seed!(m, seed)
+        @test a == [rand(m) for _=1:100]
+    end
+end
+
+@testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
+    seeds = Any[0, rand(UInt128), rand(UInt64, 4), Tuple(rand(UInt64, 4))]
+
+    for seed=seeds
+        Random.seed!(seed)
+        @test Random.GLOBAL_SEED === seed
+    end
+    # two separate loops as otherwise we are no sure that the second call (with GLOBAL_RNG)
+    # actually sets GLOBAL_SEED
+    for seed=seeds
+        Random.seed!(Random.GLOBAL_RNG, seed)
+        @test Random.GLOBAL_SEED === seed
+    end
+
+    Random.seed!(nothing)
+    seed1 = Random.GLOBAL_SEED
+    @test seed1 isa Vector{UInt64} # could change, but must not be nothing
+
+    Random.seed!(Random.GLOBAL_RNG, nothing)
+    seed2 = Random.GLOBAL_SEED
+    @test seed2 isa Vector{UInt64}
+    @test seed2 != seed1
+
+    Random.seed!()
+    seed3 = Random.GLOBAL_SEED
+    @test seed3 isa Vector{UInt64}
+    @test seed3 != seed2
+
+    Random.seed!(Random.GLOBAL_RNG)
+    seed4 = Random.GLOBAL_SEED
+    @test seed4 isa Vector{UInt64}
+    @test seed4 != seed3
 end
 
 struct RandomStruct23964 end
@@ -697,7 +738,7 @@ struct RandomStruct23964 end
     @test_throws ArgumentError rand(RandomStruct23964())
 end
 
-@testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)), RandomDevice()),
+@testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)), RandomDevice(), Xoshiro()),
                                                         T ∈ (Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
     for S in (SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
         S == SamplerRangeNDL && sizeof(T) > 8 && continue
@@ -751,28 +792,26 @@ end
     @test Random.seed!(GLOBAL_RNG, 0) === LOCAL_RNG
     @test Random.seed!(GLOBAL_RNG) === LOCAL_RNG
 
-    mt = MersenneTwister(1)
-    @test copy!(mt, GLOBAL_RNG) === mt
-    @test mt == LOCAL_RNG
-    Random.seed!(mt, 2)
-    @test mt != LOCAL_RNG
-    @test copy!(GLOBAL_RNG, mt) === LOCAL_RNG
-    @test mt == LOCAL_RNG
-    mt2 = copy(GLOBAL_RNG)
-    @test mt2 isa typeof(LOCAL_RNG)
-    @test mt2 !== LOCAL_RNG
-    @test mt2 == LOCAL_RNG
+    xo = Xoshiro()
+    @test copy!(xo, GLOBAL_RNG) === xo
+    @test xo == LOCAL_RNG
+    Random.seed!(xo, 2)
+    @test xo != LOCAL_RNG
+    @test copy!(GLOBAL_RNG, xo) === LOCAL_RNG
+    @test xo == LOCAL_RNG
+    xo2 = copy(GLOBAL_RNG)
+    @test xo2 !== LOCAL_RNG
+    @test xo2 == LOCAL_RNG
 
     for T in (Random.UInt52Raw{UInt64},
-              Random.UInt2x52Raw{UInt128},
               Random.UInt104Raw{UInt128},
               Random.CloseOpen12_64)
         x = Random.SamplerTrivial(T())
-        @test rand(GLOBAL_RNG, x) === rand(mt, x)
+        @test rand(GLOBAL_RNG, x) === rand(xo, x)
     end
     for T in (Int64, UInt64, Int128, UInt128, Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)
         x = Random.SamplerType{T}()
-        @test rand(GLOBAL_RNG, x) === rand(mt, x)
+        @test rand(GLOBAL_RNG, x) === rand(xo, x)
     end
 
     A = fill(0.0, 100, 100)
@@ -781,25 +820,32 @@ end
     vB = view(B, :, :)
     I1 = Random.SamplerTrivial(Random.CloseOpen01{Float64}())
     I2 = Random.SamplerTrivial(Random.CloseOpen12{Float64}())
-    @test rand!(GLOBAL_RNG, A, I1) === A == rand!(mt, B, I1) === B
+    @test rand!(GLOBAL_RNG, A, I1) === A == rand!(xo, B, I1) === B
     B = fill!(B, 1.0)
     @test rand!(GLOBAL_RNG, vA, I1) === vA
-    rand!(mt, vB, I1)
+    rand!(xo, vB, I1)
     @test A == B
     for T in (Float16, Float32)
         B = fill!(B, 1.0)
-        @test rand!(GLOBAL_RNG, A, I2) === A == rand!(mt, B, I2) === B
+        @test rand!(GLOBAL_RNG, A, I2) === A == rand!(xo, B, I2) === B
         B = fill!(B, 1.0)
-        @test rand!(GLOBAL_RNG, A, I1) === A == rand!(mt, B, I1) === B
+        @test rand!(GLOBAL_RNG, A, I1) === A == rand!(xo, B, I1) === B
     end
     for T in Base.BitInteger_types
         x = Random.SamplerType{T}()
         B = fill!(B, 1.0)
-        @test rand!(GLOBAL_RNG, A, x) === A == rand!(mt, B, x) === B
+        @test rand!(GLOBAL_RNG, A, x) === A == rand!(xo, B, x) === B
     end
     # issue #33170
     @test Sampler(GLOBAL_RNG, 2:4, Val(1)) isa SamplerRangeNDL
     @test Sampler(GLOBAL_RNG, 2:4, Val(Inf)) isa SamplerRangeNDL
+
+    rng = copy(GLOBAL_RNG)
+    # make sure _GLOBAL_RNG and the underlying implementation use the same code path
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
 end
 
 @testset "RNGs broadcast as scalars: T" for T in (MersenneTwister, RandomDevice)
@@ -863,15 +909,26 @@ end
     @test m == MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))
 end
 
-@testset "rand! for BigInt/BigFloat" begin
+@testset "rand[!] for BigInt/BigFloat" begin
     rng = MersenneTwister()
-    s = Random.SamplerBigInt(1:big(9))
+    s = Random.SamplerBigInt(MersenneTwister, 1:big(9))
     x = rand(s)
     @test x isa BigInt
     y = rand!(rng, x, s)
     @test y === x
     @test x in 1:9
 
+    for t = BigInt[0, 10, big(2)^100]
+        s = Random.Sampler(rng, t:t) # s.nlimbs == 0
+        @test rand(rng, s) == t
+        @test x === rand!(rng, x, s) == t
+
+        s = Random.Sampler(rng, big(-1):t) # s.nlimbs != 0
+        @test rand(rng, s) ∈ -1:t
+        @test x === rand!(rng, x, s) ∈ -1:t
+
+    end
+
     s = Random.Sampler(MersenneTwister, Random.CloseOpen01(BigFloat))
     x = rand(s)
     @test x isa BigFloat
@@ -898,3 +955,15 @@ end
     x = BigFloat()
     @test_throws ArgumentError rand!(rng, x, s) # incompatible precision
 end
+
+@testset "shuffle! for BitArray" begin
+    # Test that shuffle! is uniformly random on BitArrays
+    rng = MersenneTwister(123)
+    a = (reshape(1:(4*5), 4, 5) .<= 2) # 4x5 BitMatrix whose first two elements are true, rest are false
+    m = mean(1:50_000) do _
+        shuffle!(rng, a)
+    end # mean result of shuffle!-ing a 50_000 times. If the shuffle! is uniform, then each index has a
+    # 10% chance of having a true in it, so each value should converge to 0.1.
+    @test minimum(m) >= 0.094
+    @test maximum(m) <= 0.106
+end
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index bee52a72f77987..592db96565c7af 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -425,7 +425,7 @@ function serialize(s::AbstractSerializer, meth::Method)
         serialize(s, nothing)
     end
     if isdefined(meth, :generator)
-        serialize(s, Base._uncompressed_ast(meth, meth.generator.inferred)) # XXX: what was this supposed to do?
+        serialize(s, meth.generator)
     else
         serialize(s, nothing)
     end
@@ -434,14 +434,20 @@ function serialize(s::AbstractSerializer, meth::Method)
     else
         serialize(s, nothing)
     end
+    if isdefined(meth, :external_mt)
+        error("cannot serialize Method objects with external method tables")
+    end
     nothing
 end
 
 function serialize(s::AbstractSerializer, linfo::Core.MethodInstance)
     serialize_cycle(s, linfo) && return
-    isa(linfo.def, Module) || error("can only serialize toplevel MethodInstance objects")
     writetag(s.io, METHODINSTANCE_TAG)
-    serialize(s, linfo.uninferred)
+    if isdefined(linfo, :uninferred)
+        serialize(s, linfo.uninferred)
+    else
+        writetag(s.io, UNDEFREF_TAG)
+    end
     serialize(s, nothing)  # for backwards compat
     serialize(s, linfo.sparam_vals)
     serialize(s, Any)  # for backwards compat
@@ -455,11 +461,19 @@ function serialize(s::AbstractSerializer, t::Task)
     if istaskstarted(t) && !istaskdone(t)
         error("cannot serialize a running Task")
     end
-    state = [t.code, t.storage, t.state, t.result, t._isexception]
     writetag(s.io, TASK_TAG)
-    for fld in state
-        serialize(s, fld)
+    serialize(s, t.code)
+    serialize(s, t.storage)
+    serialize(s, t.state)
+    if t._isexception && (stk = Base.current_exceptions(t); !isempty(stk))
+        # the exception stack field is hidden inside the task, so if there
+        # is any information there make a CapturedException from it instead.
+        # TODO: Handle full exception chain, not just the first one.
+        serialize(s, CapturedException(stk[1].exception, stk[1].backtrace))
+    else
+        serialize(s, t.result)
     end
+    serialize(s, t._isexception)
 end
 
 function serialize(s::AbstractSerializer, g::GlobalRef)
@@ -496,9 +510,9 @@ function serialize_typename(s::AbstractSerializer, t::Core.TypeName)
     serialize(s, primary.parameters)
     serialize(s, primary.types)
     serialize(s, isdefined(primary, :instance))
-    serialize(s, primary.abstract)
-    serialize(s, primary.mutable)
-    serialize(s, primary.ninitialized)
+    serialize(s, t.flags & 0x1 == 0x1) # .abstract
+    serialize(s, t.flags & 0x2 == 0x2) # .mutable
+    serialize(s, Int32(length(primary.types) - t.n_uninitialized))
     if isdefined(t, :mt) && t.mt !== Symbol.name.mt
         serialize(s, t.mt.name)
         serialize(s, collect(Base.MethodList(t.mt)))
@@ -649,7 +663,7 @@ function serialize_any(s::AbstractSerializer, @nospecialize(x))
         serialize_type(s, t)
         write(s.io, x)
     else
-        if t.mutable
+        if ismutable(x)
             serialize_cycle(s, x) && return
             serialize_type(s, t, true)
         else
@@ -926,7 +940,7 @@ function handle_deserialize(s::AbstractSerializer, b::Int32)
         return deserialize_dict(s, t)
     end
     t = desertag(b)::DataType
-    if t.mutable && length(t.types) > 0  # manual specialization of fieldcount
+    if ismutabletype(t) && length(t.types) > 0  # manual specialization of fieldcount
         slot = s.counter; s.counter += 1
         push!(s.pending_refs, slot)
     end
@@ -1036,11 +1050,7 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         end
         meth.slot_syms = slot_syms
         if generator !== nothing
-            linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ())
-            linfo.specTypes = Tuple
-            linfo.inferred = generator
-            linfo.def = meth
-            meth.generator = linfo
+            meth.generator = generator
         end
         if recursion_relation !== nothing
             meth.recursion_relation = recursion_relation
@@ -1059,7 +1069,10 @@ end
 function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
     linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, (Ptr{Cvoid},), C_NULL)
     deserialize_cycle(s, linfo)
-    linfo.uninferred = deserialize(s)::CodeInfo
+    tag = Int32(read(s.io, UInt8)::UInt8)
+    if tag != UNDEFREF_TAG
+        linfo.uninferred = handle_deserialize(s, tag)::CodeInfo
+    end
     tag = Int32(read(s.io, UInt8)::UInt8)
     if tag != UNDEFREF_TAG
         # for reading files prior to v1.2
@@ -1068,7 +1081,7 @@ function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
     linfo.sparam_vals = deserialize(s)::SimpleVector
     _rettype = deserialize(s)  # for backwards compat
     linfo.specTypes = deserialize(s)
-    linfo.def = deserialize(s)::Module
+    linfo.def = deserialize(s)
     return linfo
 end
 
@@ -1243,8 +1256,8 @@ function deserialize_typename(s::AbstractSerializer, number)
     else
         # reuse the same name for the type, if possible, for nicer debugging
         tn_name = isdefined(__deserialized_types__, name) ? gensym() : name
-        tn = ccall(:jl_new_typename_in, Ref{Core.TypeName}, (Any, Any),
-                   tn_name, __deserialized_types__)
+        tn = ccall(:jl_new_typename_in, Ref{Core.TypeName}, (Any, Any, Cint, Cint),
+                   tn_name, __deserialized_types__, false, false)
         makenew = true
     end
     remember_object(s, tn, number)
@@ -1254,18 +1267,19 @@ function deserialize_typename(s::AbstractSerializer, number)
     super = deserialize(s)::Type
     parameters = deserialize(s)::SimpleVector
     types = deserialize(s)::SimpleVector
+    attrs = Core.svec()
     has_instance = deserialize(s)::Bool
     abstr = deserialize(s)::Bool
     mutabl = deserialize(s)::Bool
     ninitialized = deserialize(s)::Int32
 
     if makenew
-        tn.names = names
+        Core.setfield!(tn, :names, names)
         # TODO: there's an unhanded cycle in the dependency graph at this point:
         # while deserializing super and/or types, we may have encountered
         # tn.wrapper and throw UndefRefException before we get to this point
-        ndt = ccall(:jl_new_datatype, Any, (Any, Any, Any, Any, Any, Any, Cint, Cint, Cint),
-                    tn, tn.module, super, parameters, names, types,
+        ndt = ccall(:jl_new_datatype, Any, (Any, Any, Any, Any, Any, Any, Any, Cint, Cint, Cint),
+                    tn, tn.module, super, parameters, names, types, attrs,
                     abstr, mutabl, ninitialized)
         tn.wrapper = ndt.name.wrapper
         ccall(:jl_set_const, Cvoid, (Any, Any, Any), tn.module, tn.name, tn.wrapper)
@@ -1412,7 +1426,7 @@ function deserialize(s::AbstractSerializer, t::DataType)
     if nf == 0 && t.size > 0
         # bits type
         return read(s.io, t)
-    elseif t.mutable
+    elseif ismutabletype(t)
         x = ccall(:jl_new_struct_uninit, Any, (Any,), t)
         deserialize_cycle(s, x)
         for i in 1:nf
diff --git a/stdlib/SharedArrays/src/SharedArrays.jl b/stdlib/SharedArrays/src/SharedArrays.jl
index 347d22180f7b5c..1348a68dca9571 100644
--- a/stdlib/SharedArrays/src/SharedArrays.jl
+++ b/stdlib/SharedArrays/src/SharedArrays.jl
@@ -295,14 +295,21 @@ size(S::SharedArray) = S.dims
 elsize(::Type{SharedArray{T,N}}) where {T,N} = elsize(Array{T,N}) # aka fieldtype(T, :s)
 IndexStyle(::Type{<:SharedArray}) = IndexLinear()
 
+function local_array_by_id(refid)
+    if isa(refid, Future)
+        refid = remoteref_id(refid)
+    end
+    fetch(channel_from_id(refid))
+end
+
 function reshape(a::SharedArray{T}, dims::NTuple{N,Int}) where {T,N}
     if length(a) != prod(dims)
         throw(DimensionMismatch("dimensions must be consistent with array size"))
     end
     refs = Vector{Future}(undef, length(a.pids))
     for (i, p) in enumerate(a.pids)
-        refs[i] = remotecall(p, a.refs[i], dims) do r,d
-            reshape(fetch(r),d)
+        refs[i] = remotecall(p, a.refs[i], dims) do r, d
+            reshape(local_array_by_id(r), d)
         end
     end
 
@@ -382,7 +389,7 @@ function shared_pids(pids)
         # only use workers on the current host
         pids = procs(myid())
         if length(pids) > 1
-            pids = filter(x -> x != 1, pids)
+            pids = filter(!=(1), pids)
         end
 
         onlocalhost = true
@@ -419,13 +426,7 @@ sub_1dim(S::SharedArray, pidx) = view(S.s, range_1dim(S, pidx))
 function init_loc_flds(S::SharedArray{T,N}, empty_local=false) where T where N
     if myid() in S.pids
         S.pidx = findfirst(isequal(myid()), S.pids)
-        if isa(S.refs[1], Future)
-            refid = remoteref_id(S.refs[S.pidx])
-        else
-            refid = S.refs[S.pidx]
-        end
-        c = channel_from_id(refid)
-        S.s = fetch(c)
+        S.s = local_array_by_id(S.refs[S.pidx])
         S.loc_subarr_1d = sub_1dim(S, S.pidx)
     else
         S.pidx = 0
@@ -506,9 +507,9 @@ end
 Array(S::SharedArray) = S.s
 
 # pass through getindex and setindex! - unlike DArrays, these always work on the complete array
-getindex(S::SharedArray, i::Real) = getindex(S.s, i)
+Base.@propagate_inbounds getindex(S::SharedArray, i::Real) = getindex(S.s, i)
 
-setindex!(S::SharedArray, x, i::Real) = setindex!(S.s, x, i)
+Base.@propagate_inbounds setindex!(S::SharedArray, x, i::Real) = setindex!(S.s, x, i)
 
 function fill!(S::SharedArray, v)
     vT = convert(eltype(S), v)
diff --git a/stdlib/SharedArrays/test/runtests.jl b/stdlib/SharedArrays/test/runtests.jl
index 7a4d46d4777b32..7f1bbb6891ce06 100644
--- a/stdlib/SharedArrays/test/runtests.jl
+++ b/stdlib/SharedArrays/test/runtests.jl
@@ -176,6 +176,12 @@ d = SharedArrays.shmem_fill(1.0, (10,10,10))
 @test fill(1., 100, 10) == reshape(d,(100,10))
 d = SharedArrays.shmem_fill(1.0, (10,10,10))
 @test_throws DimensionMismatch reshape(d,(50,))
+# issue #40249, reshaping on another process
+let m = SharedArray{ComplexF64}(10, 20, 30)
+    m2 = remotecall_fetch(() -> reshape(m, (100, :)), id_other)
+    @test size(m2) == (100, 60)
+    @test m2 isa SharedArray
+end
 
 # rand, randn
 d = SharedArrays.shmem_rand(dims)
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index 65884bc190cace..fb46b9255e6f03 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -139,9 +139,6 @@ function TCPServer(; delay=true)
     return tcp
 end
 
-isreadable(io::TCPSocket) = isopen(io) || bytesavailable(io) > 0
-iswritable(io::TCPSocket) = isopen(io) && io.status != StatusClosing
-
 """
     accept(server[, client])
 
@@ -578,11 +575,11 @@ Enables or disables Nagle's algorithm on a given TCP server or socket.
 """
 function nagle(sock::Union{TCPServer, TCPSocket}, enable::Bool)
     # disable or enable Nagle's algorithm on all OSes
-    Sockets.iolock_begin()
-    Sockets.check_open(sock)
+    iolock_begin()
+    check_open(sock)
     err = ccall(:uv_tcp_nodelay, Cint, (Ptr{Cvoid}, Cint), sock.handle, Cint(!enable))
     # TODO: check err
-    Sockets.iolock_end()
+    iolock_end()
     return err
 end
 
@@ -592,15 +589,15 @@ end
 On Linux systems, the TCP_QUICKACK is disabled or enabled on `socket`.
 """
 function quickack(sock::Union{TCPServer, TCPSocket}, enable::Bool)
-    Sockets.iolock_begin()
-    Sockets.check_open(sock)
+    iolock_begin()
+    check_open(sock)
     @static if Sys.islinux()
         # tcp_quickack is a linux only option
         if ccall(:jl_tcp_quickack, Cint, (Ptr{Cvoid}, Cint), sock.handle, Cint(enable)) < 0
             @warn "Networking unoptimized ( Error enabling TCP_QUICKACK : $(Libc.strerror(Libc.errno())) )" maxlog=1
         end
     end
-    Sockets.iolock_end()
+    iolock_end()
     nothing
 end
 
@@ -806,6 +803,7 @@ socket is connected to. Valid only for connected TCP sockets.
 getpeername(sock::TCPSocket) = _sockname(sock, false)
 
 function _sockname(sock, self=true)
+    sock.status == StatusInit || check_open(sock)
     rport = Ref{Cushort}(0)
     raddress = zeros(UInt8, 16)
     rfamily = Ref{Cuint}(0)
diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl
index a4eed4d16d6fa7..586463ba0fa21e 100644
--- a/stdlib/Sockets/src/addrinfo.jl
+++ b/stdlib/Sockets/src/addrinfo.jl
@@ -260,7 +260,7 @@ julia> getipaddr(IPv6)
 ip"fe80::9731:35af:e1c5:6e49"
 ```
 
-See also: [`getipaddrs`](@ref)
+See also [`getipaddrs`](@ref).
 """
 function getipaddr(addr_type::Type{T}) where T<:IPAddr
     addrs = getipaddrs(addr_type)
@@ -305,7 +305,7 @@ julia> getipaddrs(IPv6)
  ip"fe80::445e:5fff:fe5d:5500"
 ```
 
-See also: [`islinklocaladdr`](@ref), `split(ENV["SSH_CONNECTION"], ' ')[3]`
+See also [`islinklocaladdr`](@ref).
 """
 function getipaddrs(addr_type::Type{T}=IPAddr; loopback::Bool=false) where T<:IPAddr
     addresses = T[]
diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl
index b00eeeee2d068c..90a281050d150a 100644
--- a/stdlib/Sockets/test/runtests.jl
+++ b/stdlib/Sockets/test/runtests.jl
@@ -196,6 +196,31 @@ end
 
 
 @testset "getnameinfo on some unroutable IP addresses (RFC 5737)" begin
+    try
+        getnameinfo(ip"192.0.2.1")
+        getnameinfo(ip"198.51.100.1")
+        getnameinfo(ip"203.0.113.1")
+        getnameinfo(ip"0.1.1.1")
+        getnameinfo(ip"::ffff:0.1.1.1")
+        getnameinfo(ip"::ffff:192.0.2.1")
+        getnameinfo(ip"2001:db8::1")
+    catch
+        # NOTE: Default Ubuntu installations contain a faulty DNS configuration
+        # that returns `EAI_AGAIN` instead of `EAI_NONAME`.  To fix this, try
+        # installing `libnss-resolve`, which installs the `systemd-resolve`
+        # backend for NSS, which should fix it.
+        #
+        # If you are running tests inside Docker, you'll need to install
+        # `libnss-resolve` both outside Docker (i.e. on the host machine) and
+        # inside the Docker container.
+        if Sys.islinux()
+            error_msg = string(
+                "`getnameinfo` failed on an unroutable IP address. ",
+                "If your DNS setup seems to be working, try installing libnss-resolve",
+            )
+            @error(error_msg)
+        end
+    end
     @test getnameinfo(ip"192.0.2.1") == "192.0.2.1"
     @test getnameinfo(ip"198.51.100.1") == "198.51.100.1"
     @test getnameinfo(ip"203.0.113.1") == "203.0.113.1"
@@ -526,17 +551,42 @@ end
         r = @async close(s)
         @test_throws Base._UVError("connect", Base.UV_ECANCELED) Sockets.wait_connected(s)
         fetch(r)
+        close(srv)
     end
 end
 
 @testset "iswritable" begin
     let addr = Sockets.InetAddr(ip"127.0.0.1", 4445)
         srv = listen(addr)
-        s = Sockets.TCPSocket()
-        Sockets.connect!(s, addr)
-        @test iswritable(s)
-        close(s)
-        @test !iswritable(s)
+        let s = Sockets.TCPSocket()
+            Sockets.connect!(s, addr)
+            @test iswritable(s) broken=Sys.iswindows()
+            close(s)
+            @test !iswritable(s)
+        end
+        let s = Sockets.connect(addr)
+            @test iswritable(s)
+            closewrite(s)
+            @test !iswritable(s)
+            close(s)
+        end
+        close(srv)
+        srv = listen(addr)
+        let s = Sockets.connect(addr)
+            let c = accept(srv)
+                Base.errormonitor(@async try; write(c, c); finally; close(c); end)
+            end
+            @test iswritable(s)
+            write(s, "hello world\n")
+            closewrite(s)
+            @test !iswritable(s)
+            @test isreadable(s)
+            @test read(s, String) == "hello world\n"
+            @test !isreadable(s)
+            @test !isopen(s)
+            close(s)
+        end
+        close(srv)
     end
 end
 
diff --git a/stdlib/SparseArrays/Project.toml b/stdlib/SparseArrays/Project.toml
index 53d4a9f064ad3d..bc8a968cfb4ffa 100644
--- a/stdlib/SparseArrays/Project.toml
+++ b/stdlib/SparseArrays/Project.toml
@@ -8,7 +8,8 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 [extras]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Dates", "Test", "InteractiveUtils"]
+test = ["Dates", "Test", "Printf", "InteractiveUtils"]
diff --git a/stdlib/SparseArrays/src/SparseArrays.jl b/stdlib/SparseArrays/src/SparseArrays.jl
index c3c802d5bce2cc..e3fcd1ef955c99 100644
--- a/stdlib/SparseArrays/src/SparseArrays.jl
+++ b/stdlib/SparseArrays/src/SparseArrays.jl
@@ -12,7 +12,7 @@ using LinearAlgebra
 
 import Base: +, -, *, \, /, &, |, xor, ==, zero
 import LinearAlgebra: mul!, ldiv!, rdiv!, cholesky, adjoint!, diag, eigen, dot,
-    issymmetric, istril, istriu, lu, tr, transpose!, tril!, triu!,
+    issymmetric, istril, istriu, lu, tr, transpose!, tril!, triu!, isbanded,
     cond, diagm, factorize, ishermitian, norm, opnorm, lmul!, rmul!, tril, triu, matprod
 
 import Base: acos, acosd, acot, acotd, acsch, asech, asin, asind, asinh,
diff --git a/stdlib/SparseArrays/src/abstractsparse.jl b/stdlib/SparseArrays/src/abstractsparse.jl
index 3ea70f48b4a7e1..86d6c4b3da56d5 100644
--- a/stdlib/SparseArrays/src/abstractsparse.jl
+++ b/stdlib/SparseArrays/src/abstractsparse.jl
@@ -50,17 +50,19 @@ julia> issparse(Array(sv))
 false
 ```
 """
-issparse(A::AbstractArray) = false
+function issparse(A::AbstractArray)
+    # Handle wrapper arrays: sparse if it is wrapping a sparse array.
+    # This gets compiled away during specialization.
+    p = parent(A)
+    if p === A
+        # have reached top of wrapping without finding a sparse array, assume it is not.
+        return false
+    else
+        return issparse(p)
+    end
+end
+issparse(A::DenseArray) = false
 issparse(S::AbstractSparseArray) = true
-issparse(S::LinearAlgebra.Adjoint{<:Any,<:AbstractSparseArray}) = true
-issparse(S::LinearAlgebra.Transpose{<:Any,<:AbstractSparseArray}) = true
-
-issparse(S::LinearAlgebra.Symmetric{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.Hermitian{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.LowerTriangular{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.UnitLowerTriangular{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.UpperTriangular{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.UnitUpperTriangular{<:Any,<:AbstractSparseMatrix}) = true
 
 indtype(S::AbstractSparseArray{<:Any,Ti}) where {Ti} = Ti
 
diff --git a/stdlib/SparseArrays/src/higherorderfns.jl b/stdlib/SparseArrays/src/higherorderfns.jl
index 383211267ee3b4..a5941da7648833 100644
--- a/stdlib/SparseArrays/src/higherorderfns.jl
+++ b/stdlib/SparseArrays/src/higherorderfns.jl
@@ -8,7 +8,7 @@ import Base: map, map!, broadcast, copy, copyto!, _extrema_dims, _extrema_itr
 
 using Base: front, tail, to_shape
 using ..SparseArrays: SparseVector, SparseMatrixCSC, AbstractSparseVector, AbstractSparseMatrixCSC,
-                      AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange,
+                      AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange, spzeros,
                       SparseVectorUnion, AdjOrTransSparseVectorUnion, nonzeroinds, nonzeros, rowvals, getcolptr, widelength
 using Base.Broadcast: BroadcastStyle, Broadcasted, flatten
 using LinearAlgebra
@@ -132,12 +132,17 @@ function trimstorage!(A::SparseVecOrMat, maxstored)
     resize!(storedvals(A), maxstored)
     return maxstored
 end
+
 function expandstorage!(A::SparseVecOrMat, maxstored)
-    length(storedinds(A)) < maxstored && resize!(storedinds(A), maxstored)
-    length(storedvals(A)) < maxstored && resize!(storedvals(A), maxstored)
+    if length(storedinds(A)) < maxstored
+        resize!(storedinds(A), maxstored)
+        resize!(storedvals(A), maxstored)
+    end
     return maxstored
 end
 
+_checkbuffers(S::SparseMatrixCSC) = (@assert length(getcolptr(S)) == size(S, 2) + 1 && getcolptr(S)[end] - 1 == length(rowvals(S)) == length(nonzeros(S)); S)
+_checkbuffers(S::SparseVector) = (@assert length(storedvals(S)) == length(storedinds(S)); S)
 
 # (2) map[!] entry points
 map(f::Tf, A::SparseVector) where {Tf} = _noshapecheck_map(f, A)
@@ -181,7 +186,7 @@ copy(bc::SpBroadcasted1) = _noshapecheck_map(bc.f, bc.args[1])
         storedvals(C)[1] = fofnoargs
         broadcast!(f, view(storedvals(C), 2:length(storedvals(C))))
     end
-    return C
+    return _checkbuffers(C)
 end
 
 function _diffshape_broadcast(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N}
@@ -224,22 +229,17 @@ _maxnnzfrom(shape::NTuple{2}, A::AbstractSparseMatrixCSC) = nnz(A) * div(shape[1
 @inline _unchecked_maxnnzbcres(shape, As...) = _unchecked_maxnnzbcres(shape, As)
 @inline _checked_maxnnzbcres(shape::NTuple{1}, As...) = shape[1] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
 @inline _checked_maxnnzbcres(shape::NTuple{2}, As...) = shape[1] != 0 && shape[2] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
-@inline function _allocres(shape::NTuple{1}, indextype, entrytype, maxnnz)
-    storedinds = Vector{indextype}(undef, maxnnz)
-    storedvals = Vector{entrytype}(undef, maxnnz)
-    return SparseVector(shape..., storedinds, storedvals)
-end
-@inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz)
-    pointers = ones(indextype, shape[2] + 1)
-    storedinds = Vector{indextype}(undef, maxnnz)
-    storedvals = Vector{entrytype}(undef, maxnnz)
-    return SparseMatrixCSC(shape..., pointers, storedinds, storedvals)
+@inline function _allocres(shape::Union{NTuple{1},NTuple{2}}, indextype, entrytype, maxnnz)
+    X = spzeros(entrytype, indextype, shape)
+    resize!(storedinds(X), maxnnz)
+    resize!(storedvals(X), maxnnz)
+    return X
 end
 
 # (4) _map_zeropres!/_map_notzeropres! specialized for a single sparse vector/matrix
 "Stores only the nonzero entries of `map(f, Array(A))` in `C`."
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     Ck = 1
     @inbounds for j in columns(C)
         setcolptr!(C, j, Ck)
@@ -255,7 +255,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 """
 Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and
@@ -274,7 +274,7 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMa
     end
     # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over /
     # nonsequential access of storedvals(C) does not appear to improve performance.
-    return C
+    return _checkbuffers(C)
 end
 # helper functions for these methods and some of those below
 @inline _densecoloffsets(A::SparseVector) = 0
@@ -297,7 +297,7 @@ end
 
 # (5) _map_zeropres!/_map_notzeropres! specialized for a pair of sparse vectors/matrices
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     rowsentinelA = convert(indtype(A), numrows(C) + 1)
     rowsentinelB = convert(indtype(B), numrows(C) + 1)
     Ck = 1
@@ -336,7 +336,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVe
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     # Build dense matrix structure in C, expanding storage if necessary
@@ -368,13 +368,13 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMa
             Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 
 # (6) _map_zeropres!/_map_notzeropres! for more than two sparse matrices / vectors
 function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     rowsentinel = numrows(C) + 1
     Ck = 1
     stopks = _colstartind_all(1, As)
@@ -398,7 +398,7 @@ function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     # Build dense matrix structure in C, expanding storage if necessary
@@ -421,7 +421,7 @@ function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{Spars
             activerow = min(rows...)
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 # helper methods for map/map! methods just above
@@ -462,7 +462,7 @@ end
 # (7) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a single (input) sparse vector/matrix
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     # C and A cannot have the same shape, as we directed that case to map in broadcast's
     # entry point; here we need efficiently handle only heterogeneous C-A combinations where
     # one or both of C and A has at least one singleton dimension.
@@ -509,7 +509,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
     # For information on this code, see comments in similar code in _broadcast_zeropres! above
@@ -540,14 +540,14 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseV
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 
 # (8) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse vectors/matrices
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     rowsentinelA = convert(indtype(A), numrows(C) + 1)
     rowsentinelB = convert(indtype(B), numrows(C) + 1)
     # C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's
@@ -711,7 +711,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::Sp
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
     # For information on this code, see comments in similar code in _broadcast_zeropres! above
@@ -810,7 +810,7 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseV
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 _finishempty!(C::SparseVector) = C
 _finishempty!(C::AbstractSparseMatrixCSC) = (fill!(getcolptr(C), 1); C)
@@ -861,7 +861,7 @@ end
 # (9) _broadcast_zeropres!/_broadcast_notzeropres! for more than two (input) sparse vectors/matrices
 function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    spaceC::Int = length(nonzeros(C))
     expandsverts = _expandsvert_all(C, As)
     expandshorzs = _expandshorz_all(C, As)
     rowsentinel = numrows(C) + 1
@@ -909,7 +909,7 @@ function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMa
     end
     @inbounds setcolptr!(C, numcols(C) + 1, Ck)
     trimstorage!(C, Ck - 1)
-    return C
+    return _checkbuffers(C)
 end
 function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
     isempty(C) && return _finishempty!(C)
@@ -950,7 +950,7 @@ function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg
             end
         end
     end
-    return C
+    return _checkbuffers(C)
 end
 
 # helper method for broadcast/broadcast! methods just above
diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl
index 0ac2806b77a049..cf0b2a75f12825 100644
--- a/stdlib/SparseArrays/src/linalg.jl
+++ b/stdlib/SparseArrays/src/linalg.jl
@@ -79,10 +79,10 @@ end
     (T = promote_op(matprod, eltype(adjA), eltype(x)); mul!(similar(x, T, size(adjA, 1)), adjA, x, true, false))
 *(adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, B::AdjOrTransDenseMatrix) =
     (T = promote_op(matprod, eltype(adjA), eltype(B)); mul!(similar(B, T, (size(adjA, 1), size(B, 2))), adjA, B, true, false))
-*(transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::DenseInputVector) =
-    (T = promote_op(matprod, eltype(transA), eltype(x)); mul!(similar(x, T, size(transA, 1)), transA, x, true, false))
-*(transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, B::AdjOrTransDenseMatrix) =
-    (T = promote_op(matprod, eltype(transA), eltype(B)); mul!(similar(B, T, (size(transA, 1), size(B, 2))), transA, B, true, false))
+*(tA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::DenseInputVector) =
+    (T = promote_op(matprod, eltype(tA), eltype(x)); mul!(similar(x, T, size(tA, 1)), tA, x, true, false))
+*(tA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, B::AdjOrTransDenseMatrix) =
+    (T = promote_op(matprod, eltype(tA), eltype(B)); mul!(similar(B, T, (size(tA, 1), size(B, 2))), tA, B, true, false))
 
 function mul!(C::StridedVecOrMat, X::AdjOrTransDenseMatrix, A::AbstractSparseMatrixCSC, α::Number, β::Number)
     mX, nX = size(X)
@@ -138,8 +138,8 @@ for (T, t) in ((Adjoint, adjoint), (Transpose, transpose))
 end
 *(X::AdjOrTransDenseMatrix, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) =
     (T = promote_op(matprod, eltype(X), eltype(adjA)); mul!(similar(X, T, (size(X, 1), size(adjA, 2))), X, adjA, true, false))
-*(X::AdjOrTransDenseMatrix, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}) =
-    (T = promote_op(matprod, eltype(X), eltype(transA)); mul!(similar(X, T, (size(X, 1), size(transA, 2))), X, transA, true, false))
+*(X::AdjOrTransDenseMatrix, tA::Transpose{<:Any,<:AbstractSparseMatrixCSC}) =
+    (T = promote_op(matprod, eltype(X), eltype(tA)); mul!(similar(X, T, (size(X, 1), size(tA, 2))), X, tA, true, false))
 
 function (*)(D::Diagonal, A::AbstractSparseMatrixCSC)
     T = Base.promote_op(*, eltype(D), eltype(A))
@@ -869,9 +869,7 @@ function nzrangelo(A, i)
 end
 ## end of symmetric/Hermitian
 
-\(A::Transpose{<:Real,<:Hermitian{<:Real,<:AbstractSparseMatrixCSC}}, B::Vector) = A.parent \ B
 \(A::Transpose{<:Complex,<:Hermitian{<:Complex,<:AbstractSparseMatrixCSC}}, B::Vector) = copy(A) \ B
-\(A::Transpose{<:Number,<:Symmetric{<:Number,<:AbstractSparseMatrixCSC}}, B::Vector) = A.parent \ B
 
 function rdiv!(A::AbstractSparseMatrixCSC{T}, D::Diagonal{T}) where T
     dd = D.diag
@@ -891,11 +889,6 @@ function rdiv!(A::AbstractSparseMatrixCSC{T}, D::Diagonal{T}) where T
     A
 end
 
-rdiv!(A::AbstractSparseMatrixCSC{T}, adjD::Adjoint{<:Any,<:Diagonal{T}}) where {T} =
-    (D = adjD.parent; rdiv!(A, conj(D)))
-rdiv!(A::AbstractSparseMatrixCSC{T}, transD::Transpose{<:Any,<:Diagonal{T}}) where {T} =
-    (D = transD.parent; rdiv!(A, D))
-
 function ldiv!(D::Diagonal{T}, A::AbstractSparseMatrixCSC{T}) where {T}
     # require_one_based_indexing(A)
     if size(A, 1) != length(D.diag)
@@ -912,10 +905,6 @@ function ldiv!(D::Diagonal{T}, A::AbstractSparseMatrixCSC{T}) where {T}
     end
     A
 end
-ldiv!(adjD::Adjoint{<:Any,<:Diagonal{T}}, A::AbstractSparseMatrixCSC{T}) where {T} =
-    (D = adjD.parent; ldiv!(conj(D), A))
-ldiv!(transD::Transpose{<:Any,<:Diagonal{T}}, A::AbstractSparseMatrixCSC{T}) where {T} =
-    (D = transD.parent; ldiv!(D, A))
 
 ## triu, tril
 
@@ -935,16 +924,15 @@ function triu(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
     end
     rowval = Vector{Ti}(undef, nnz)
     nzval = Vector{Tv}(undef, nnz)
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
     for col = max(k+1,1) : n
         c1 = getcolptr(S)[col]
-        for c2 in nzrange(A, col)
-            rowvals(A)[c2] = rowvals(S)[c1]
-            nonzeros(A)[c2] = nonzeros(S)[c1]
+        for c2 in colptr[col]:colptr[col+1]-1
+            rowval[c2] = rowvals(S)[c1]
+            nzval[c2] = nonzeros(S)[c1]
             c1 += 1
         end
     end
-    A
+    SparseMatrixCSC(m, n, colptr, rowval, nzval)
 end
 
 function tril(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
@@ -965,17 +953,16 @@ function tril(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
     end
     rowval = Vector{Ti}(undef, nnz)
     nzval = Vector{Tv}(undef, nnz)
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
     for col = 1 : min(n, m+k)
         c1 = getcolptr(S)[col+1]-1
-        l2 = getcolptr(A)[col+1]-1
-        for c2 = 0 : l2 - getcolptr(A)[col]
-            rowvals(A)[l2 - c2] = rowvals(S)[c1]
-            nonzeros(A)[l2 - c2] = nonzeros(S)[c1]
+        l2 = colptr[col+1]-1
+        for c2 = 0 : l2 - colptr[col]
+            rowval[l2 - c2] = rowvals(S)[c1]
+            nzval[l2 - c2] = nonzeros(S)[c1]
             c1 -= 1
         end
     end
-    A
+    SparseMatrixCSC(m, n, colptr, rowval, nzval)
 end
 
 ## diff
@@ -1340,7 +1327,6 @@ end
 
 ## kron
 @inline function kron!(C::SparseMatrixCSC, A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC)
-    nnzC = nnz(A)*nnz(B)
     mA, nA = size(A); mB, nB = size(B)
     mC, nC = mA*mB, nA*nB
 
@@ -1348,11 +1334,9 @@ end
     nzvalC = nonzeros(C)
     colptrC = getcolptr(C)
 
-    @boundscheck begin
-        length(colptrC) == nC+1 || throw(DimensionMismatch("expect C to be preallocated with $(nC+1) colptrs "))
-        length(rowvalC) == nnzC || throw(DimensionMismatch("expect C to be preallocated with $(nnzC) rowvals"))
-        length(nzvalC) == nnzC || throw(DimensionMismatch("expect C to be preallocated with $(nnzC) nzvals"))
-    end
+    nnzC = nnz(A)*nnz(B)
+    resize!(nzvalC, nnzC)
+    resize!(rowvalC, nnzC)
 
     col = 1
     @inbounds for j = 1:nA
@@ -1381,16 +1365,13 @@ end
 end
 
 @inline function kron!(z::SparseVector, x::SparseVector, y::SparseVector)
-    nnzx = nnz(x); nnzy = nnz(y); nnzz = nnz(z);
+    nnzx = nnz(x); nnzy = nnz(y);
     nzind = nonzeroinds(z)
     nzval = nonzeros(z)
 
-    @boundscheck begin
-        nnzval = length(nzval); nnzind = length(nzind)
-        nnzz = nnzx*nnzy
-        nnzval == nnzz || throw(DimensionMismatch("expect z to be preallocated with $nnzz nonzeros"))
-        nnzind == nnzz || throw(DimensionMismatch("expect z to be preallocated with $nnzz nonzeros"))
-    end
+    nnzz = nnzx*nnzy
+    resize!(nzind, nnzz)
+    resize!(nzval, nnzz)
 
     @inbounds for i = 1:nnzx, j = 1:nnzy
         this_ind = (i-1)*nnzy+j
@@ -1402,17 +1383,12 @@ end
 
 # sparse matrix ⊗ sparse matrix
 function kron(A::AbstractSparseMatrixCSC{T1,S1}, B::AbstractSparseMatrixCSC{T2,S2}) where {T1,S1,T2,S2}
-    nnzC = nnz(A)*nnz(B)
     mA, nA = size(A); mB, nB = size(B)
     mC, nC = mA*mB, nA*nB
     Tv = typeof(one(T1)*one(T2))
     Ti = promote_type(S1,S2)
-    colptrC = Vector{Ti}(undef, nC+1)
-    rowvalC = Vector{Ti}(undef, nnzC)
-    nzvalC  = Vector{Tv}(undef, nnzC)
-    colptrC[1] = 1
-    # skip sparse_check
-    C = SparseMatrixCSC{Tv, Ti}(mC, nC, colptrC, rowvalC, nzvalC)
+    C = spzeros(Tv, Ti, mC, nC)
+    sizehint!(C, nnz(A)*nnz(B))
     return @inbounds kron!(C, A, B)
 end
 
@@ -1578,7 +1554,7 @@ for (xformtype, xformop) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
             if m == n
                 if istril(A)
                     if istriu(A)
-                        return \($xformop(Diagonal(Vector(diag(A)))), B)
+                        return \(Diagonal(($xformop.(diag(A)))), B)
                     else
                         return \($xformop(LowerTriangular(A)), B)
                     end
diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl
index 7bd988d881152a..af5eb4e4ab7262 100644
--- a/stdlib/SparseArrays/src/sparsematrix.jl
+++ b/stdlib/SparseArrays/src/sparsematrix.jl
@@ -25,10 +25,9 @@ struct SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrixCSC{Tv,Ti}
 
     function SparseMatrixCSC{Tv,Ti}(m::Integer, n::Integer, colptr::Vector{Ti},
                             rowval::Vector{Ti}, nzval::Vector{Tv}) where {Tv,Ti<:Integer}
-        @noinline throwsz(str, lbl, k) =
-            throw(ArgumentError("number of $str ($lbl) must be ≥ 0, got $k"))
-        m < 0 && throwsz("rows", 'm', m)
-        n < 0 && throwsz("columns", 'n', n)
+        sparse_check_Ti(m, n, Ti)
+        _goodbuffers(Int(m), Int(n), colptr, rowval, nzval) ||
+            throw(ArgumentError("Illegal buffers for SparseMatrixCSC construction $n $colptr $rowval $nzval"))
         new(Int(m), Int(n), colptr, rowval, nzval)
     end
 end
@@ -80,6 +79,16 @@ end
 
 size(S::SparseMatrixCSC) = (getfield(S, :m), getfield(S, :n))
 
+_goodbuffers(S::SparseMatrixCSC) = _goodbuffers(size(S)..., getcolptr(S), getrowval(S), nonzeros(S))
+_checkbuffers(S::SparseMatrixCSC) = (@assert _goodbuffers(S); S)
+_checkbuffers(S::Union{Adjoint, Transpose}) = (_checkbuffers(parent(S)); S)
+
+function _goodbuffers(m, n, colptr, rowval, nzval)
+    (length(colptr) == n + 1 && colptr[end] - 1 == length(rowval) == length(nzval))
+    # stronger check for debugging purposes
+    # && all(issorted(@view rowval[colptr[i]:colptr[i+1]-1]) for i=1:n)
+end
+
 # Define an alias for views of a SparseMatrixCSC which include all rows and a unit range of the columns.
 # Also define a union of SparseMatrixCSC and this view since many methods can be defined efficiently for
 # this union by extracting the fields via the get function: getcolptr, getrowval, and getnzval. The key
@@ -117,6 +126,8 @@ julia> nnz(A)
 """
 nnz(S::AbstractSparseMatrixCSC) = Int(getcolptr(S)[size(S, 2) + 1]) - 1
 nnz(S::ReshapedArray{<:Any,1,<:AbstractSparseMatrixCSC}) = nnz(parent(S))
+nnz(S::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) = nnz(parent(S))
+nnz(S::Transpose{<:Any,<:AbstractSparseMatrixCSC}) = nnz(parent(S))
 nnz(S::UpperTriangular{<:Any,<:AbstractSparseMatrixCSC}) = nnz1(S)
 nnz(S::LowerTriangular{<:Any,<:AbstractSparseMatrixCSC}) = nnz1(S)
 nnz(S::SparseMatrixCSCView) = nnz1(S)
@@ -207,6 +218,7 @@ nzrange(S::SparseMatrixCSCView, col::Integer) = nzrange(S.parent, S.indices[2][c
 nzrange(S::UpperTriangular{<:Any,<:SparseMatrixCSCUnion}, i::Integer) = nzrangeup(S.data, i)
 nzrange(S::LowerTriangular{<:Any,<:SparseMatrixCSCUnion}, i::Integer) = nzrangelo(S.data, i)
 
+const AbstractSparseMatrixCSCInclAdjointAndTranspose = Union{AbstractSparseMatrixCSC,Adjoint{<:Any,<:AbstractSparseMatrixCSC},Transpose{<:Any,<:AbstractSparseMatrixCSC}}
 function Base.isstored(A::AbstractSparseMatrixCSC, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     rows = rowvals(A)
@@ -216,24 +228,50 @@ function Base.isstored(A::AbstractSparseMatrixCSC, i::Integer, j::Integer)
     return false
 end
 
-Base.replace_in_print_matrix(A::AbstractSparseMatrix, i::Integer, j::Integer, s::AbstractString) =
+function Base.isstored(A::Union{Adjoint{<:Any,<:AbstractSparseMatrixCSC},Transpose{<:Any,<:AbstractSparseMatrixCSC}}, i::Integer, j::Integer)
+    @boundscheck checkbounds(A, i, j)
+    cols = rowvals(parent(A))
+    for istored in nzrange(parent(A), i)
+        j == cols[istored] && return true
+    end
+    return false
+end
+
+Base.replace_in_print_matrix(A::AbstractSparseMatrixCSCInclAdjointAndTranspose, i::Integer, j::Integer, s::AbstractString) =
     Base.isstored(A, i, j) ? s : Base.replace_with_centered_mark(s)
 
-function Base.show(io::IO, ::MIME"text/plain", S::AbstractSparseMatrixCSC)
+function Base.array_summary(io::IO, S::AbstractSparseMatrixCSCInclAdjointAndTranspose, dims::Tuple{Vararg{Base.OneTo}})
+    _checkbuffers(S)
+
     xnnz = nnz(S)
     m, n = size(S)
     print(io, m, "×", n, " ", typeof(S), " with ", xnnz, " stored ",
               xnnz == 1 ? "entry" : "entries")
-    if !(m == 0 || n == 0)
-        print(io, ":")
-        show(IOContext(io, :typeinfo => eltype(S)), S)
+    nothing
+end
+
+# called by `show(io, MIME("text/plain"), ::AbstractSparseMatrixCSCInclAdjointAndTranspose)`
+function Base.print_array(io::IO, S::AbstractSparseMatrixCSCInclAdjointAndTranspose)
+    if max(size(S)...) < 16
+        Base.print_matrix(io, S)
+    else
+        _show_with_braille_patterns(io, S)
     end
 end
 
-Base.show(io::IO, S::AbstractSparseMatrixCSC) = Base.show(convert(IOContext, io), S::AbstractSparseMatrixCSC)
+# always show matrices as `sparse(I, J, K)`
+function Base.show(io::IO, S::AbstractSparseMatrixCSCInclAdjointAndTranspose)
+    _checkbuffers(S)
+    # can't use `findnz`, because that expects all values not to be #undef
+    I = rowvals(S)
+    J = [col for col = 1 : size(S, 2) for k = getcolptr(S)[col] : (getcolptr(S)[col+1]-1)]
+    K = nonzeros(S)
+    m, n = size(S)
+    print(io, "sparse(", I, ", ", J, ", ", K, ", ", m, ", ", n, ")")
+end
 
 const brailleBlocks = UInt16['⠁', '⠂', '⠄', '⡀', '⠈', '⠐', '⠠', '⢀']
-function _show_with_braille_patterns(io::IOContext, S::AbstractSparseMatrixCSC)
+function _show_with_braille_patterns(io::IO, S::AbstractSparseMatrixCSCInclAdjointAndTranspose)
     m, n = size(S)
     (m == 0 || n == 0) && return show(io, MIME("text/plain"), S)
 
@@ -267,47 +305,51 @@ function _show_with_braille_patterns(io::IOContext, S::AbstractSparseMatrixCSC)
     brailleGrid = fill(UInt16(10240), (scaleWidth - 1) ÷ 2 + 2, (scaleHeight - 1) ÷ 4 + 1)
     brailleGrid[end, :] .= '\n'
 
-    rvals = rowvals(S)
+    rvals = rowvals(parent(S))
     rowscale = max(1, scaleHeight - 1) / max(1, m - 1)
     colscale = max(1, scaleWidth - 1) / max(1, n - 1)
-    @inbounds for j = 1:n
-        # Scale the column index `j` to the best matching column index
-        # of a matrix of size `scaleHeight × scaleWidth`
-        sj = round(Int, (j - 1) * colscale + 1)
-        for x in nzrange(S, j)
-            # Scale the row index `i` to the best matching row index
+    if isa(S, AbstractSparseMatrixCSC)
+        @inbounds for j = 1:n
+            # Scale the column index `j` to the best matching column index
             # of a matrix of size `scaleHeight × scaleWidth`
-            si = round(Int, (rvals[x] - 1) * rowscale + 1)
-
-            # Given the index pair `(si, sj)` of the scaled matrix,
-            # calculate the corresponding triple `(k, l, p)` such that the
-            # element at `(si, sj)` can be found at position `(k, l)` in the
-            # braille grid `brailleGrid` and corresponds to the 1-dot braille
-            # character `brailleBlocks[p]`
-            k = (sj - 1) ÷ 2 + 1
-            l = (si - 1) ÷ 4 + 1
-            p = ((sj - 1) % 2) * 4 + ((si - 1) % 4 + 1)
-
-            brailleGrid[k, l] |= brailleBlocks[p]
+            sj = round(Int, (j - 1) * colscale + 1)
+            for x in nzrange(S, j)
+                # Scale the row index `i` to the best matching row index
+                # of a matrix of size `scaleHeight × scaleWidth`
+                si = round(Int, (rvals[x] - 1) * rowscale + 1)
+
+                # Given the index pair `(si, sj)` of the scaled matrix,
+                # calculate the corresponding triple `(k, l, p)` such that the
+                # element at `(si, sj)` can be found at position `(k, l)` in the
+                # braille grid `brailleGrid` and corresponds to the 1-dot braille
+                # character `brailleBlocks[p]`
+                k = (sj - 1) ÷ 2 + 1
+                l = (si - 1) ÷ 4 + 1
+                p = ((sj - 1) % 2) * 4 + ((si - 1) % 4 + 1)
+
+                brailleGrid[k, l] |= brailleBlocks[p]
+            end
+        end
+    else
+        # If `S` is a adjoint or transpose of a sparse matrix we invert the
+        # roles of the indices `i` and `j`
+        @inbounds for i = 1:m
+            si = round(Int, (i - 1) * rowscale + 1)
+            for x in nzrange(parent(S), i)
+                sj = round(Int, (rvals[x] - 1) * colscale + 1)
+                k = (sj - 1) ÷ 2 + 1
+                l = (si - 1) ÷ 4 + 1
+                p = ((sj - 1) % 2) * 4 + ((si - 1) % 4 + 1)
+                brailleGrid[k, l] |= brailleBlocks[p]
+            end
         end
     end
     foreach(c -> print(io, Char(c)), @view brailleGrid[1:end-1])
 end
 
-function Base.show(io::IOContext, S::AbstractSparseMatrixCSC)
-    if max(size(S)...) < 16 && !(get(io, :compact, false)::Bool)
-        ioc = IOContext(io, :compact => true)
-        println(ioc)
-        Base.print_matrix(ioc, S)
-        return
-    end
-    println(io)
-    _show_with_braille_patterns(io, S)
-end
-
 ## Reshape
 
-function sparse_compute_reshaped_colptr_and_rowval(colptrS::Vector{Ti}, rowvalS::Vector{Ti},
+function sparse_compute_reshaped_colptr_and_rowval!(colptrS::Vector{Ti}, rowvalS::Vector{Ti},
                                                    mS::Int, nS::Int, colptrA::Vector{Ti},
                                                    rowvalA::Vector{Ti}, mA::Int, nA::Int) where Ti
     lrowvalA = length(rowvalA)
@@ -350,7 +392,7 @@ function copy(ra::ReshapedArray{<:Any,2,<:AbstractSparseMatrixCSC})
     rowval = similar(rowvals(a))
     nzval = copy(nonzeros(a))
 
-    sparse_compute_reshaped_colptr_and_rowval(colptr, rowval, mS, nS, getcolptr(a), rowvals(a), mA, nA)
+    sparse_compute_reshaped_colptr_and_rowval!(colptr, rowval, mS, nS, getcolptr(a), rowvals(a), mA, nA)
 
     return SparseMatrixCSC(mS, nS, colptr, rowval, nzval)
 end
@@ -377,7 +419,7 @@ function copyto!(A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC)
             copyto!(rowvals(A), rowvals(B))
         else
             # This is like a "reshape B into A".
-            sparse_compute_reshaped_colptr_and_rowval(getcolptr(A), rowvals(A), size(A, 1), size(A, 2), getcolptr(B), rowvals(B), size(B, 1), size(B, 2))
+            sparse_compute_reshaped_colptr_and_rowval!(getcolptr(A), rowvals(A), size(A, 1), size(A, 2), getcolptr(B), rowvals(B), size(B, 1), size(B, 2))
         end
     else
         widelength(A) >= widelength(B) || throw(BoundsError())
@@ -407,10 +449,10 @@ function copyto!(A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC)
         @inbounds for i in 2:length(getcolptr(A))
             getcolptr(A)[i] += nnzB - lastmodptrA
         end
-        sparse_compute_reshaped_colptr_and_rowval(getcolptr(A), rowvals(A), size(A, 1), lastmodcolA-1, getcolptr(B), rowvals(B), size(B, 1), size(B, 2))
+        sparse_compute_reshaped_colptr_and_rowval!(getcolptr(A), rowvals(A), size(A, 1), lastmodcolA-1, getcolptr(B), rowvals(B), size(B, 1), size(B, 2))
     end
     copyto!(nonzeros(A), nonzeros(B))
-    return A
+    return _checkbuffers(A)
 end
 
 copyto!(A::AbstractMatrix, B::AbstractSparseMatrixCSC) = _sparse_copyto!(A, B)
@@ -469,17 +511,17 @@ function _sparsesimilar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}
     newrowval = copyto!(similar(rowvals(S), TiNew), rowvals(S))
     return SparseMatrixCSC(size(S, 1), size(S, 2), newcolptr, newrowval, similar(nonzeros(S), TvNew))
 end
-# parent methods for similar that preserves only storage space (for when new and old dims differ)
+# parent methods for similar that preserves only storage space (for when new dims are 2-d)
 _sparsesimilar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{2}) where {TvNew,TiNew} =
-    SparseMatrixCSC(dims..., fill(one(TiNew), last(dims)+1), similar(rowvals(S), TiNew), similar(nonzeros(S), TvNew))
-# parent method for similar that allocates an empty sparse vector (when new dims are single)
+    sizehint!(spzeros(TvNew, TiNew, dims...), length(nonzeros(S)))
+# parent method for similar that allocates an empty sparse vector (for when new dims are 1-d)
 _sparsesimilar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{1}) where {TvNew,TiNew} =
     SparseVector(dims..., similar(rowvals(S), TiNew, 0), similar(nonzeros(S), TvNew, 0))
-#
+
 # The following methods hook into the AbstractArray similar hierarchy. The first method
 # covers similar(A[, Tv]) calls, which preserve stored-entry structure, and the latter
-# methods cover similar(A[, Tv], shape...) calls, which preserve storage space when the shape
-# calls for a two-dimensional result.
+# methods cover similar(A[, Tv], shape...) calls, which partially preserve
+# storage space when the shape calls for a two-dimensional result.
 similar(S::AbstractSparseMatrixCSC{<:Any,Ti}, ::Type{TvNew}) where {Ti,TvNew} = _sparsesimilar(S, TvNew, Ti)
 similar(S::AbstractSparseMatrixCSC{<:Any,Ti}, ::Type{TvNew}, dims::Union{Dims{1},Dims{2}}) where {Ti,TvNew} =
     _sparsesimilar(S, TvNew, Ti, dims)
@@ -496,6 +538,12 @@ similar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, m::Integer) wh
 similar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, m::Integer, n::Integer) where {TvNew,TiNew} =
     _sparsesimilar(S, TvNew, TiNew, (m, n))
 
+function Base.sizehint!(S::SparseMatrixCSC, n::Integer)
+    nhint = min(n, widelength(S))
+    sizehint!(getrowval(S), nhint)
+    sizehint!(nonzeros(S),  nhint)
+    return S
+end
 
 # converting between SparseMatrixCSC types
 SparseMatrixCSC(S::AbstractSparseMatrixCSC) = copy(S)
@@ -658,6 +706,7 @@ SparseMatrixCSC{Tv,Ti}(M::Transpose{<:Any,<:AbstractSparseMatrixCSC}) where {Tv,
 
 # converting from SparseMatrixCSC to other matrix types
 function Matrix(S::AbstractSparseMatrixCSC{Tv}) where Tv
+    _checkbuffers(S)
     A = Matrix{Tv}(undef, size(S, 1), size(S, 2))
     copyto!(A, S)
     return A
@@ -666,6 +715,17 @@ Array(S::AbstractSparseMatrixCSC) = Matrix(S)
 
 convert(T::Type{<:AbstractSparseMatrixCSC}, m::AbstractMatrix) = m isa T ? m : T(m)
 
+convert(T::Type{<:Diagonal},       m::AbstractSparseMatrixCSC) = m isa T ? m :
+    isdiag(m) ? T(m) : throw(ArgumentError("matrix cannot be represented as Diagonal"))
+convert(T::Type{<:SymTridiagonal}, m::AbstractSparseMatrixCSC) = m isa T ? m :
+    issymmetric(m) && isbanded(m, -1, 1) ? T(m) : throw(ArgumentError("matrix cannot be represented as SymTridiagonal"))
+convert(T::Type{<:Tridiagonal},    m::AbstractSparseMatrixCSC) = m isa T ? m :
+    isbanded(m, -1, 1) ? T(m) : throw(ArgumentError("matrix cannot be represented as Tridiagonal"))
+convert(T::Type{<:LowerTriangular}, m::AbstractSparseMatrixCSC) = m isa T ? m :
+    istril(m) ? T(m) : throw(ArgumentError("matrix cannot be represented as LowerTriangular"))
+convert(T::Type{<:UpperTriangular}, m::AbstractSparseMatrixCSC) = m isa T ? m :
+    istriu(m) ? T(m) : throw(ArgumentError("matrix cannot be represented as UpperTriangular"))
+
 float(S::SparseMatrixCSC) = SparseMatrixCSC(size(S, 1), size(S, 2), copy(getcolptr(S)), copy(rowvals(S)), float.(nonzeros(S)))
 complex(S::SparseMatrixCSC) = SparseMatrixCSC(size(S, 1), size(S, 2), copy(getcolptr(S)), copy(rowvals(S)), complex(copy(nonzeros(S))))
 
@@ -689,7 +749,7 @@ julia> sparse(A)
   ⋅    ⋅   1.0
 ```
 """
-sparse(A::AbstractMatrix{Tv}) where {Tv} = convert(SparseMatrixCSC{Tv,Int}, A)
+sparse(A::AbstractMatrix{Tv}) where {Tv} = convert(SparseMatrixCSC{Tv}, A)
 
 sparse(S::AbstractSparseMatrixCSC) = copy(S)
 
@@ -1016,6 +1076,8 @@ respectively. Simultaneously fixes the one-position-forward shift in `getcolptr(
 """
 @noinline function _distributevals_halfperm!(X::AbstractSparseMatrixCSC{Tv,Ti},
         A::AbstractSparseMatrixCSC{TvA,Ti}, q::AbstractVector{<:Integer}, f::Function) where {Tv,TvA,Ti}
+    resize!(nonzeros(X), nnz(A))
+    resize!(rowvals(X), nnz(A))
     @inbounds for Xi in 1:size(A, 2)
         Aj = q[Xi]
         for Ak in nzrange(A, Aj)
@@ -1035,16 +1097,8 @@ function ftranspose!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixC
         throw(DimensionMismatch(string("destination argument `X`'s column count, ",
             "`size(X, 2) (= $(size(X, 2)))`, must match source argument `A`'s row count, `size(A, 1) (= $(size(A, 1)))`")))
     elseif size(X, 1) != size(A, 2)
-        throw(DimensionMismatch(string("destination argument `X`'s row count,
-            `size(X, 1) (= $(size(X, 1)))`, must match source argument `A`'s column count, `size(A, 2) (= $(size(A, 2)))`")))
-    elseif length(rowvals(X)) < nnz(A)
-        throw(ArgumentError(string("the length of destination argument `X`'s `rowval` ",
-            "array, `length(rowvals(X)) (= $(length(rowvals(X))))`, must be greater than or ",
-            "equal to source argument `A`'s allocated entry count, `nnz(A) (= $(nnz(A)))`")))
-    elseif length(nonzeros(X)) < nnz(A)
-        throw(ArgumentError(string("the length of destination argument `X`'s `nzval` ",
-            "array, `length(nonzeros(X)) (= $(length(nonzeros(X))))`, must be greater than or ",
-            "equal to source argument `A`'s allocated entry count, `nnz(A) (= $(nnz(A)))`")))
+        throw(DimensionMismatch(string("destination argument `X`'s row count, ",
+            "`size(X, 1) (= $(size(X, 1)))`, must match source argument `A`'s column count, `size(A, 2) (= $(size(A, 2)))`")))
     end
     halfperm!(X, A, 1:size(A, 2), f)
 end
@@ -1055,8 +1109,9 @@ adjoint!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti}) w
 function ftranspose(A::AbstractSparseMatrixCSC{TvA,Ti}, f::Function, eltype::Type{Tv} = TvA) where {Tv,TvA,Ti}
     X = SparseMatrixCSC(size(A, 2), size(A, 1),
                         ones(Ti, size(A, 1)+1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
+                        Vector{Ti}(undef, 0),
+                        Vector{Tv}(undef, 0))
+    sizehint!(X, nnz(A))
     halfperm!(X, A, 1:size(A, 2), f)
 end
 adjoint(A::AbstractSparseMatrixCSC) = Adjoint(A)
@@ -1091,7 +1146,7 @@ avoids an unnecessary length-`nnz(A)` array-sweep and associated recomputation o
 pointers. See [`halfperm!`](:func:SparseArrays.halfperm!) for additional algorithmic
 information.
 
-See also: `unchecked_aliasing_permute!`
+See also `unchecked_aliasing_permute!`.
 """
 function unchecked_noalias_permute!(X::AbstractSparseMatrixCSC{Tv,Ti},
         A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
@@ -1268,16 +1323,17 @@ For additional (algorithmic) information, and for versions of these methods that
 argument checking, see (unexported) parent methods `unchecked_noalias_permute!`
 and `unchecked_aliasing_permute!`.
 
-See also: [`permute`](@ref).
+See also [`permute`](@ref).
 """
 function permute!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti},
         p::AbstractVector{<:Integer}, q::AbstractVector{<:Integer}) where {Tv,Ti}
     _checkargs_sourcecompatdest_permute!(A, X)
     _checkargs_sourcecompatperms_permute!(A, p, q)
-    C = SparseMatrixCSC(size(A, 2), size(A, 1),
-                        ones(Ti, size(A, 1) + 1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
+    # bypass strict buffer checking
+    C = spzeros(Tv, Ti, size(A,2), size(A,1))
+    resize!(getrowval(C), nnz(A))
+    resize!(getnzval(C), nnz(A))
+
     _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, getcolptr(X))
     unchecked_noalias_permute!(X, A, p, q, C)
 end
@@ -1293,10 +1349,9 @@ end
 function permute!(A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
         q::AbstractVector{<:Integer}) where {Tv,Ti}
     _checkargs_sourcecompatperms_permute!(A, p, q)
-    C = SparseMatrixCSC(size(A, 2), size(A, 1),
-                        ones(Ti, size(A, 1) + 1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
+    C = spzeros(Tv, Ti, size(A,2), size(A,1))
+    resize!(getrowval(C), nnz(A))
+    resize!(getnzval(C), nnz(A))
     workcolptr = Vector{Ti}(undef, size(A, 2) + 1)
     _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, workcolptr)
     unchecked_aliasing_permute!(A, p, q, C, workcolptr)
@@ -1355,14 +1410,14 @@ julia> permute(A, [1, 2, 3, 4], [4, 3, 2, 1])
 function permute(A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
         q::AbstractVector{<:Integer}) where {Tv,Ti}
     _checkargs_sourcecompatperms_permute!(A, p, q)
-    X = SparseMatrixCSC(size(A, 1), size(A, 2),
-                        ones(Ti, size(A, 2) + 1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
-    C = SparseMatrixCSC(size(A, 2), size(A, 1),
-                        ones(Ti, size(A, 1) + 1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
+    # bypass strict buffer checking
+    X = spzeros(Tv, Ti, size(A,1), size(A,2))
+    resize!(getrowval(X), nnz(A))
+    resize!(getnzval(X), nnz(A))
+    # bypass strict buffer checking
+    C = spzeros(Tv, Ti, size(A,2), size(A,1))
+    resize!(getrowval(C), nnz(A))
+    resize!(getnzval(C), nnz(A))
     _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, getcolptr(X))
     unchecked_noalias_permute!(X, A, p, q, C)
 end
@@ -1455,6 +1510,7 @@ For an out-of-place version, see [`dropzeros`](@ref). For
 algorithmic information, see `fkeep!`.
 """
 dropzeros!(A::AbstractSparseMatrixCSC) = fkeep!(A, (i, j, x) -> !iszero(x))
+
 """
     dropzeros(A::AbstractSparseMatrixCSC;)
 
@@ -1585,13 +1641,14 @@ argument specifies a random number generator, see [Random Numbers](@ref).
 # Examples
 ```jldoctest; setup = :(using Random; Random.seed!(1234))
 julia> sprand(Bool, 2, 2, 0.5)
-2×2 SparseMatrixCSC{Bool, Int64} with 1 stored entry:
+2×2 SparseMatrixCSC{Bool, Int64} with 2 stored entries:
  ⋅  ⋅
- ⋅  1
+ 1  1
 
 julia> sprand(Float64, 3, 0.75)
-3-element SparseVector{Float64, Int64} with 1 stored entry:
-  [3]  =  0.298614
+3-element SparseVector{Float64, Int64} with 2 stored entries:
+  [1]  =  0.523355
+  [2]  =  0.0890391
 ```
 """
 function sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn::Function, ::Type{T}=eltype(rfn(r, 1))) where T
@@ -1632,9 +1689,9 @@ argument specifies a random number generator, see [Random Numbers](@ref).
 # Examples
 ```jldoctest; setup = :(using Random; Random.seed!(0))
 julia> sprandn(2, 2, 0.75)
-2×2 SparseMatrixCSC{Float64, Int64} with 2 stored entries:
-  ⋅   0.586617
-  ⋅   0.297336
+2×2 SparseMatrixCSC{Float64, Int64} with 3 stored entries:
+ -1.92631  -0.858041
+   ⋅        0.0213808
 ```
 """
 sprandn(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) =
@@ -2045,7 +2102,7 @@ function _findr(op, A, region, Tv)
             throw(ArgumentError("array slices must be non-empty"))
         else
             ri = Base.reduced_indices0(A, region)
-            return (similar(A, ri), zeros(Ti, ri))
+            return (zeros(Tv, ri), zeros(Ti, ri))
         end
     end
 
@@ -2480,7 +2537,7 @@ function permute_rows!(S::AbstractSparseMatrixCSC{Tv,Ti}, pI::Vector{Int}) where
             k += 1
         end
     end
-    S
+    return _checkbuffers(S)
 end
 
 function getindex_general(A::AbstractSparseMatrixCSC, I::AbstractVector, J::AbstractVector)
@@ -2634,6 +2691,7 @@ function Base.fill!(V::SubArray{Tv, <:Any, <:AbstractSparseMatrixCSC{Tv}, <:Tupl
     else
         _spsetnz_setindex!(A, convert(Tv, x), I, J)
     end
+    return _checkbuffers(A)
 end
 """
 Helper method for immediately preceding fill! method. For all (i,j) such that i in I and
@@ -2917,7 +2975,7 @@ function setindex!(A::AbstractSparseMatrixCSC{Tv,Ti}, V::AbstractVecOrMat, Ix::U
     deleteat!(rowvalA, colptrA[end]:length(rowvalA))
     deleteat!(nzvalA, colptrA[end]:length(nzvalA))
 
-    return A
+    return _checkbuffers(A)
 end
 
 # Logical setindex!
@@ -3026,7 +3084,7 @@ function setindex!(A::AbstractSparseMatrixCSC, x::AbstractArray, I::AbstractMatr
             deleteat!(rowvalB, bidx:n)
         end
     end
-    A
+    return _checkbuffers(A)
 end
 
 function setindex!(A::AbstractSparseMatrixCSC, x::AbstractArray, Ix::AbstractVector{<:Integer})
@@ -3137,7 +3195,7 @@ function setindex!(A::AbstractSparseMatrixCSC, x::AbstractArray, Ix::AbstractVec
             deleteat!(rowvalB, bidx:n)
         end
     end
-    A
+    return _checkbuffers(A)
 end
 
 ## dropstored! methods
@@ -3173,7 +3231,7 @@ function dropstored!(A::AbstractSparseMatrixCSC, i::Integer, j::Integer)
             @inbounds getcolptr(A)[m] -= 1
         end
     end
-    return A
+    return _checkbuffers(A)
 end
 """
     dropstored!(A::AbstractSparseMatrixCSC, I::AbstractVector{<:Integer}, J::AbstractVector{<:Integer})
@@ -3260,7 +3318,7 @@ function dropstored!(A::AbstractSparseMatrixCSC,
         deleteat!(rowvalA, rowidx:nnzA)
         deleteat!(nzvalA, rowidx:nnzA)
     end
-    return A
+    return _checkbuffers(A)
 end
 dropstored!(A::AbstractSparseMatrixCSC, i::Integer, J::AbstractVector{<:Integer}) = dropstored!(A, [i], J)
 dropstored!(A::AbstractSparseMatrixCSC, I::AbstractVector{<:Integer}, j::Integer) = dropstored!(A, I, [j])
@@ -3274,6 +3332,10 @@ dropstored!(A::AbstractSparseMatrixCSC, ::Colon) = dropstored!(A, :, :)
 
 # Sparse concatenation
 
+promote_idxtype(::AbstractSparseMatrixCSC{<:Any, Ti}) where {Ti} = Ti
+promote_idxtype(::AbstractSparseMatrixCSC{<:Any, Ti}, X::AbstractSparseMatrixCSC...) where {Ti} =
+    promote_type(Ti, promote_idxtype(X...))
+
 function vcat(X::AbstractSparseMatrixCSC...)
     num = length(X)
     mX = Int[ size(x, 1) for x in X ]
@@ -3288,7 +3350,7 @@ function vcat(X::AbstractSparseMatrixCSC...)
     end
 
     Tv = promote_eltype(X...)
-    Ti = promote_eltype(map(x->rowvals(x), X)...)
+    Ti = promote_idxtype(X...)
 
     nnzX = Int[ nnz(x) for x in X ]
     nnz_res = sum(nnzX)
@@ -3340,7 +3402,7 @@ function hcat(X::AbstractSparseMatrixCSC...)
     n = sum(nX)
 
     Tv = promote_eltype(X...)
-    Ti = promote_eltype(map(x->rowvals(x), X)...)
+    Ti = promote_idxtype(X...)
 
     colptr = Vector{Ti}(undef, n+1)
     nnzX = Int[ nnz(x) for x in X ]
@@ -3694,74 +3756,6 @@ function tr(A::AbstractSparseMatrixCSC{Tv}) where Tv
     return s
 end
 
-
-# Sort all the indices in each column of a CSC sparse matrix
-# sortSparseMatrixCSC!(A, sortindices = :sortcols)        # Sort each column with sort()
-# sortSparseMatrixCSC!(A, sortindices = :doubletranspose) # Sort with a double transpose
-function sortSparseMatrixCSC!(A::AbstractSparseMatrixCSC{Tv,Ti}; sortindices::Symbol = :sortcols) where {Tv,Ti}
-    if sortindices === :doubletranspose
-        nB, mB = size(A)
-        B = SparseMatrixCSC(mB, nB, Vector{Ti}(undef, nB+1), similar(rowvals(A)), similar(nonzeros(A)))
-        transpose!(B, A)
-        transpose!(A, B)
-        return A
-    end
-
-    m, n = size(A)
-    colptr = getcolptr(A); rowval = rowvals(A); nzval = nonzeros(A)
-
-    index = zeros(Ti, m)
-    row = zeros(Ti, m)
-    val = zeros(Tv, m)
-
-    perm = Base.Perm(Base.ord(isless, identity, false, Base.Order.Forward), row)
-
-    @inbounds for i = 1:n
-        nzr = nzrange(A, i)
-        numrows = length(nzr)
-        if numrows <= 1
-            continue
-        elseif numrows == 2
-            f = first(nzr)
-            s = f+1
-            if rowval[f] > rowval[s]
-                rowval[f], rowval[s] = rowval[s], rowval[f]
-                nzval[f],  nzval[s]  = nzval[s],  nzval[f]
-            end
-            continue
-        end
-        resize!(row, numrows)
-        resize!(index, numrows)
-
-        jj = 1
-        @simd for j = nzr
-            row[jj] = rowval[j]
-            val[jj] = nzval[j]
-            jj += 1
-        end
-
-        if numrows <= 16
-            alg = Base.Sort.InsertionSort
-        else
-            alg = Base.Sort.QuickSort
-        end
-
-        # Reset permutation
-        index .= 1:numrows
-
-        sort!(index, alg, perm)
-
-        jj = 1
-        @simd for j = nzr
-            rowval[j] = row[index[jj]]
-            nzval[j] = val[index[jj]]
-            jj += 1
-        end
-    end
-
-    return A
-end
-
 ## rotations
 
 function rot180(A::AbstractSparseMatrixCSC)
@@ -3837,7 +3831,7 @@ function circshift!(O::AbstractSparseMatrixCSC, X::AbstractSparseMatrixCSC, (r,c
     @inbounds for i=1:size(O, 2)
         subvector_shifter!(rowvals(O), nonzeros(O), getcolptr(O)[i], getcolptr(O)[i+1]-1, size(O, 1), r)
     end
-    return O
+    return _checkbuffers(O)
 end
 
 circshift!(O::AbstractSparseMatrixCSC, X::AbstractSparseMatrixCSC, (r,)::Base.DimsInteger{1}) = circshift!(O, X, (r,0))
diff --git a/stdlib/SparseArrays/src/sparsevector.jl b/stdlib/SparseArrays/src/sparsevector.jl
index 777be897ea7df0..55ad738a7eb770 100644
--- a/stdlib/SparseArrays/src/sparsevector.jl
+++ b/stdlib/SparseArrays/src/sparsevector.jl
@@ -15,7 +15,7 @@ import LinearAlgebra: promote_to_array_type, promote_to_arrays_
 Vector type for storing sparse vectors.
 """
 struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
-    n::Int              # Length of the sparse vector
+    n::Ti              # Length of the sparse vector
     nzind::Vector{Ti}   # Indices of stored values
     nzval::Vector{Tv}   # Stored values, typically nonzeros
 
@@ -23,7 +23,7 @@ struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
         n >= 0 || throw(ArgumentError("The number of elements must be non-negative."))
         length(nzind) == length(nzval) ||
             throw(ArgumentError("index and value vectors must be the same length"))
-        new(convert(Int, n), nzind, nzval)
+        new(convert(Ti, n), nzind, nzval)
     end
 end
 
@@ -84,30 +84,37 @@ rowvals(x::SparseVectorUnion) = nonzeroinds(x)
 indtype(x::SparseColumnView) = indtype(parent(x))
 indtype(x::SparseVectorView) = indtype(parent(x))
 
+
+function Base.sizehint!(v::SparseVector, newlen::Integer)
+    sizehint!(nonzeroinds(v), newlen)
+    sizehint!(nonzeros(v), newlen)
+    return v
+end
+
 ## similar
 #
 # parent method for similar that preserves stored-entry structure (for when new and old dims match)
 _sparsesimilar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}) where {TvNew,TiNew} =
     SparseVector(length(S), copyto!(similar(nonzeroinds(S), TiNew), nonzeroinds(S)), similar(nonzeros(S), TvNew))
-# parent method for similar that preserves nothing (for when old and new dims differ, and new is 1d)
+# parent method for similar that preserves nothing (for when new dims are 1-d)
 _sparsesimilar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{1}) where {TvNew,TiNew} =
     SparseVector(dims..., similar(nonzeroinds(S), TiNew, 0), similar(nonzeros(S), TvNew, 0))
 # parent method for similar that preserves storage space (for old and new dims differ, and new is 2d)
-_sparsesimilar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{2}) where {TvNew,TiNew} =
-    SparseMatrixCSC(dims..., fill(one(TiNew), last(dims)+1), similar(nonzeroinds(S), TiNew), similar(nonzeros(S), TvNew))
+function _sparsesimilar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{2}) where {TvNew,TiNew}
+    S1 = SparseMatrixCSC(dims..., fill(one(TiNew), last(dims)+1), similar(nonzeroinds(S), TiNew, 0), similar(nonzeros(S), TvNew, 0))
+    return sizehint!(S1, min(widelength(S1), length(nonzeroinds(S))))
+end
 # The following methods hook into the AbstractArray similar hierarchy. The first method
 # covers similar(A[, Tv]) calls, which preserve stored-entry structure, and the latter
 # methods cover similar(A[, Tv], shape...) calls, which preserve nothing if the dims
-# specify a SparseVector result and storage space if the dims specify a SparseMatrixCSC result.
+# specify a SparseVector or a SparseMatrixCSC result.
 similar(S::SparseVector{<:Any,Ti}, ::Type{TvNew}) where {Ti,TvNew} =
     _sparsesimilar(S, TvNew, Ti)
 similar(S::SparseVector{<:Any,Ti}, ::Type{TvNew}, dims::Union{Dims{1},Dims{2}}) where {Ti,TvNew} =
     _sparsesimilar(S, TvNew, Ti, dims)
 # The following methods cover similar(A, Tv, Ti[, shape...]) calls, which specify the
 # result's index type in addition to its entry type, and aren't covered by the hooks above.
-# The calls without shape again preserve stored-entry structure, whereas those with
-# one-dimensional shape preserve nothing, and those with two-dimensional shape
-# preserve storage space.
+# The calls without shape again preserve stored-entry structure but no storage space.
 similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}) where{TvNew,TiNew} =
     _sparsesimilar(S, TvNew, TiNew)
 similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, dims::Union{Dims{1},Dims{2}}) where {TvNew,TiNew} =
@@ -125,8 +132,11 @@ Base.unaliascopy(S::SparseVector) = typeof(S)(length(S), unaliascopy(nonzeroinds
 ### Construct empty sparse vector
 
 spzeros(len::Integer) = spzeros(Float64, len)
+spzeros(dims::Tuple{<:Integer}) = spzeros(Float64, dims[1])
 spzeros(::Type{T}, len::Integer) where {T} = SparseVector(len, Int[], T[])
+spzeros(::Type{T}, dims::Tuple{<:Integer}) where {T} = spzeros(T, dims[1])
 spzeros(::Type{Tv}, ::Type{Ti}, len::Integer) where {Tv,Ti<:Integer} = SparseVector(len, Ti[], Tv[])
+spzeros(::Type{Tv}, ::Type{Ti}, dims::Tuple{<:Integer}) where {Tv,Ti<:Integer} = spzeros(Tv, Ti, dims[1])
 
 LinearAlgebra.fillstored!(x::SparseVector, y) = (fill!(nonzeros(x), y); x)
 
@@ -1073,35 +1083,43 @@ const _Triangular_DenseArrays{T,A<:Matrix} = LinearAlgebra.AbstractTriangular{T,
 const _Annotated_DenseArrays = Union{_Triangular_DenseArrays, _Symmetric_DenseArrays, _Hermitian_DenseArrays}
 const _Annotated_Typed_DenseArrays{T} = Union{_Triangular_DenseArrays{T}, _Symmetric_DenseArrays{T}, _Hermitian_DenseArrays{T}}
 
-const _SparseConcatGroup = Union{Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _SparseConcatArrays, _Annotated_SparseConcatArrays, _Annotated_DenseArrays}
-const _DenseConcatGroup = Union{Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _Annotated_DenseArrays}
+const _SparseConcatGroup = Union{Number, Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _SparseConcatArrays, _Annotated_SparseConcatArrays, _Annotated_DenseArrays}
+const _DenseConcatGroup = Union{Number, Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _Annotated_DenseArrays}
 const _TypedDenseConcatGroup{T} = Union{Vector{T}, Adjoint{T,Vector{T}}, Transpose{T,Vector{T}}, Matrix{T}, _Annotated_Typed_DenseArrays{T}}
 
 # Concatenations involving un/annotated sparse/special matrices/vectors should yield sparse arrays
+_makesparse(x::Number) = x
+_makesparse(x::AbstractArray) = SparseMatrixCSC(issparse(x) ? x : sparse(x))
+
 function Base._cat(dims, Xin::_SparseConcatGroup...)
-    X = map(x -> SparseMatrixCSC(issparse(x) ? x : sparse(x)), Xin)
+    X = map(_makesparse, Xin)
     T = promote_eltype(Xin...)
     Base.cat_t(T, X...; dims=dims)
 end
 function hcat(Xin::_SparseConcatGroup...)
-    X = map(x -> SparseMatrixCSC(issparse(x) ? x : sparse(x)), Xin)
-    hcat(X...)
+    X = map(_makesparse, Xin)
+    return cat(X..., dims=Val(2))
 end
 function vcat(Xin::_SparseConcatGroup...)
-    X = map(x -> SparseMatrixCSC(issparse(x) ? x : sparse(x)), Xin)
-    vcat(X...)
-end
-function hvcat(rows::Tuple{Vararg{Int}}, X::_SparseConcatGroup...)
-    nbr = length(rows)  # number of block rows
-
-    tmp_rows = Vector{SparseMatrixCSC}(undef, nbr)
-    k = 0
-    @inbounds for i = 1 : nbr
-        tmp_rows[i] = hcat(X[(1 : rows[i]) .+ k]...)
-        k += rows[i]
-    end
-    vcat(tmp_rows...)
-end
+    X = map(_makesparse, Xin)
+    return cat(X..., dims=Val(1))
+end
+hvcat(rows::Tuple{Vararg{Int}}, X::_SparseConcatGroup...) =
+    vcat(_hvcat_rows(rows, X...)...)
+function _hvcat_rows((row1, rows...)::Tuple{Vararg{Int}}, X::_SparseConcatGroup...)
+    if row1 ≤ 0
+        throw(ArgumentError("length of block row must be positive, got $row1"))
+    end
+    # assert `X` is non-empty so that inference of `eltype` won't include `Type{Union{}}`
+    T = eltype(X::Tuple{Any,Vararg{Any}})
+    # inference of `getindex` may be imprecise in case `row1` is not const-propagated up
+    # to here, so help inference with the following type-assertions
+    return (
+        hcat(X[1 : row1]::Tuple{typeof(X[1]),Vararg{T}}...),
+        _hvcat_rows(rows, X[row1+1:end]::Tuple{Vararg{T}}...)...
+    )
+end
+_hvcat_rows(::Tuple{}, X::_SparseConcatGroup...) = ()
 
 # make sure UniformScaling objects are converted to sparse matrices for concatenation
 promote_to_array_type(A::Tuple{Vararg{Union{_SparseConcatGroup,UniformScaling}}}) = SparseMatrixCSC
@@ -1354,34 +1372,50 @@ end
 
 ### Reduction
 
+function _sum(f, x::AbstractSparseVector)
+    n = length(x)
+    n > 0 || return sum(f, nonzeros(x)) # return zero() of proper type
+    m = nnz(x)
+    (m == 0 ? n * f(zero(eltype(x))) :
+     m == n ? sum(f, nonzeros(x)) :
+     Base.add_sum((n - m) * f(zero(eltype(x))), sum(f, nonzeros(x))))
+end
+
+sum(f::Union{Function, Type}, x::AbstractSparseVector) = _sum(f, x) # resolve ambiguity
+sum(f, x::AbstractSparseVector) = _sum(f, x)
 sum(x::AbstractSparseVector) = sum(nonzeros(x))
 
-function maximum(x::AbstractSparseVector{T}) where T<:Real
+function _maximum(f, x::AbstractSparseVector)
     n = length(x)
-    n > 0 || throw(ArgumentError("maximum over empty array is not allowed."))
+    if n == 0
+        if f === abs || f === abs2
+            return zero(eltype(x)) # preserving maximum(abs/abs2, x) behaviour in 1.0.x
+        else
+            throw(ArgumentError("maximum over an empty array is not allowed."))
+        end
+    end
     m = nnz(x)
-    (m == 0 ? zero(T) :
-     m == n ? maximum(nonzeros(x)) :
-     max(zero(T), maximum(nonzeros(x))))::T
+    (m == 0 ? f(zero(eltype(x))) :
+     m == n ? maximum(f, nonzeros(x)) :
+     max(f(zero(eltype(x))), maximum(f, nonzeros(x))))
 end
 
-function minimum(x::AbstractSparseVector{T}) where T<:Real
+maximum(f::Union{Function, Type}, x::AbstractSparseVector) = _maximum(f, x) # resolve ambiguity
+maximum(f, x::AbstractSparseVector) = _maximum(f, x)
+maximum(x::AbstractSparseVector) = maximum(identity, x)
+
+function _minimum(f, x::AbstractSparseVector)
     n = length(x)
-    n > 0 || throw(ArgumentError("minimum over empty array is not allowed."))
+    n > 0 || throw(ArgumentError("minimum over an empty array is not allowed."))
     m = nnz(x)
-    (m == 0 ? zero(T) :
-     m == n ? minimum(nonzeros(x)) :
-     min(zero(T), minimum(nonzeros(x))))::T
+    (m == 0 ? f(zero(eltype(x))) :
+     m == n ? minimum(f, nonzeros(x)) :
+     min(f(zero(eltype(x))), minimum(f, nonzeros(x))))
 end
 
-for f in [:sum, :maximum, :minimum], op in [:abs, :abs2]
-    SV = :AbstractSparseVector
-    if f === :minimum
-        @eval ($f)(::typeof($op), x::$SV{T}) where {T<:Number} = nnz(x) < length(x) ? ($op)(zero(T)) : ($f)($op, nonzeros(x))
-    else
-        @eval ($f)(::typeof($op), x::$SV) = ($f)($op, nonzeros(x))
-    end
-end
+minimum(f::Union{Function, Type}, x::AbstractSparseVector) = _minimum(f, x) # resolve ambiguity
+minimum(f, x::AbstractSparseVector) = _minimum(f, x)
+minimum(x::AbstractSparseVector) = minimum(identity, x)
 
 norm(x::SparseVectorUnion, p::Real=2) = norm(nonzeros(x), p)
 
@@ -1542,9 +1576,6 @@ function (*)(A::_StridedOrTriangularMatrix{Ta}, x::AbstractSparseVector{Tx}) whe
     mul!(y, A, x)
 end
 
-mul!(y::AbstractVector{Ty}, A::_StridedOrTriangularMatrix, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    mul!(y, A, x, true, false)
-
 function mul!(y::AbstractVector, A::_StridedOrTriangularMatrix, x::AbstractSparseVector, α::Number, β::Number)
     require_one_based_indexing(y, A, x)
     m, n = size(A)
@@ -1572,21 +1603,18 @@ end
 
 # * and mul!(C, transpose(A), B)
 
-function *(transA::Transpose{<:Any,<:_StridedOrTriangularMatrix{Ta}}, x::AbstractSparseVector{Tx}) where {Ta,Tx}
-    require_one_based_indexing(transA, x)
-    m, n = size(transA)
+function *(tA::Transpose{<:Any,<:_StridedOrTriangularMatrix{Ta}}, x::AbstractSparseVector{Tx}) where {Ta,Tx}
+    require_one_based_indexing(tA, x)
+    m, n = size(tA)
     length(x) == n || throw(DimensionMismatch())
-    Ty = promote_op(matprod, eltype(transA), eltype(x))
+    Ty = promote_op(matprod, eltype(tA), eltype(x))
     y = Vector{Ty}(undef, m)
-    mul!(y, transA, x)
+    mul!(y, tA, x)
 end
 
-mul!(y::AbstractVector{Ty}, transA::Transpose{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    mul!(y, transA, x, true, false)
-
-function mul!(y::AbstractVector, transA::Transpose{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector, α::Number, β::Number)
-    require_one_based_indexing(y, transA, x)
-    m, n = size(transA)
+function mul!(y::AbstractVector, tA::Transpose{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector, α::Number, β::Number)
+    require_one_based_indexing(y, tA, x)
+    m, n = size(tA)
     length(x) == n && length(y) == m || throw(DimensionMismatch())
     m == 0 && return y
     if β != one(β)
@@ -1599,7 +1627,7 @@ function mul!(y::AbstractVector, transA::Transpose{<:Any,<:_StridedOrTriangularM
     _nnz = length(xnzind)
     _nnz == 0 && return y
 
-    A = transA.parent
+    A = tA.parent
     Ty = promote_op(matprod, eltype(A), eltype(x))
     @inbounds for j = 1:m
         s = zero(Ty)
@@ -1622,9 +1650,6 @@ function *(adjA::Adjoint{<:Any,<:_StridedOrTriangularMatrix{Ta}}, x::AbstractSpa
     mul!(y, adjA, x)
 end
 
-mul!(y::AbstractVector{Ty}, adjA::Adjoint{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    mul!(y, adjA, x, true, false)
-
 function mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector, α::Number, β::Number)
     require_one_based_indexing(y, adjA, x)
     m, n = size(adjA)
@@ -1681,9 +1706,6 @@ end
 
 # * and mul!
 
-mul!(y::AbstractVector{Ty}, A::AbstractSparseMatrixCSC, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    mul!(y, A, x, true, false)
-
 function mul!(y::AbstractVector, A::AbstractSparseMatrixCSC, x::AbstractSparseVector, α::Number, β::Number)
     require_one_based_indexing(y, A, x)
     m, n = size(A)
@@ -1714,18 +1736,11 @@ function mul!(y::AbstractVector, A::AbstractSparseMatrixCSC, x::AbstractSparseVe
 end
 
 # * and *(Tranpose(A), B)
-
-mul!(y::AbstractVector{Ty}, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    (A = transA.parent; mul!(y, transpose(A), x, true, false))
-
-mul!(y::AbstractVector, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector, α::Number, β::Number) =
-    (A = transA.parent; _At_or_Ac_mul_B!((a,b) -> transpose(a) * b, y, A, x, α, β))
-
-mul!(y::AbstractVector{Ty}, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    (A = adjA.parent; mul!(y, adjoint(A), x, true, false))
+mul!(y::AbstractVector, tA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector, α::Number, β::Number) =
+    _At_or_Ac_mul_B!((a,b) -> transpose(a) * b, y, tA.parent, x, α, β)
 
 mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector, α::Number, β::Number) =
-    (A = adjA.parent; _At_or_Ac_mul_B!((a,b) -> adjoint(a) * b, y, A, x, α, β))
+    _At_or_Ac_mul_B!((a,b) -> adjoint(a) * b, y, adjA.parent, x, α, β)
 
 function _At_or_Ac_mul_B!(tfun::Function,
                           y::AbstractVector, A::AbstractSparseMatrixCSC, x::AbstractSparseVector,
@@ -1765,11 +1780,11 @@ function *(A::AbstractSparseMatrixCSC, x::AbstractSparseVector)
     _dense2sparsevec(y, initcap)
 end
 
-*(transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector) =
-    (A = transA.parent; _At_or_Ac_mul_B((a,b) -> transpose(a) * b, A, x, promote_op(matprod, eltype(transA), eltype(x))))
+*(tA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector) =
+    _At_or_Ac_mul_B((a,b) -> transpose(a) * b, tA.parent, x, promote_op(matprod, eltype(tA), eltype(x)))
 
 *(adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector) =
-    (A = adjA.parent; _At_or_Ac_mul_B((a,b) -> adjoint(a) * b, A, x, promote_op(matprod, eltype(adjA), eltype(x))))
+    _At_or_Ac_mul_B((a,b) -> adjoint(a) * b, adjA.parent, x, promote_op(matprod, eltype(adjA), eltype(x)))
 
 function _At_or_Ac_mul_B(tfun::Function, A::AbstractSparseMatrixCSC{TvA,TiA}, x::AbstractSparseVector{TvX,TiX},
                          Tv = promote_op(matprod, TvA, TvX)) where {TvA,TiA,TvX,TiX}
diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl
index ef5159d41031d8..464dca3db15766 100644
--- a/stdlib/SparseArrays/test/sparse.jl
+++ b/stdlib/SparseArrays/test/sparse.jl
@@ -48,7 +48,7 @@ end
     S = sparse(I, 3, 3)
     fill!(S, 0)
     @test iszero(S)  # test success with stored zeros via fill!
-    @test iszero(SparseMatrixCSC(2, 2, [1,2,3], [1,2], [0,0,1])) # test success with nonzeros beyond data range
+    @test_throws ArgumentError iszero(SparseMatrixCSC(2, 2, [1,2,3], [1,2], [0,0,1])) # test failure with nonzeros beyond data range
 end
 @testset "isone specialization for SparseMatrixCSC" begin
     @test isone(sparse(I, 3, 3))    # test success
@@ -78,6 +78,24 @@ end
     @test Array(SparseMatrixCSC{eltype(a), Int8}(a)) == Array(a)
 end
 
+@testset "conversion to special LinearAlgebra types" begin
+    # issue 40924
+    @test convert(Diagonal, sparse(Diagonal(1:2))) isa Diagonal
+    @test convert(Diagonal, sparse(Diagonal(1:2))) == Diagonal(1:2)
+    @test convert(Tridiagonal, sparse(Tridiagonal(1:3, 4:7, 8:10))) isa Tridiagonal
+    @test convert(Tridiagonal, sparse(Tridiagonal(1:3, 4:7, 8:10))) == Tridiagonal(1:3, 4:7, 8:10)
+    @test convert(SymTridiagonal, sparse(SymTridiagonal(1:4, 5:7))) isa SymTridiagonal
+    @test convert(SymTridiagonal, sparse(SymTridiagonal(1:4, 5:7))) == SymTridiagonal(1:4, 5:7)
+
+    lt = LowerTriangular([1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0])
+    @test convert(LowerTriangular, sparse(lt)) isa LowerTriangular
+    @test convert(LowerTriangular, sparse(lt)) == lt
+
+    ut = UpperTriangular([1.0 2.0 3.0; 4.0 5.0 6.0; 7.0 8.0 9.0])
+    @test convert(UpperTriangular, sparse(ut)) isa UpperTriangular
+    @test convert(UpperTriangular, sparse(ut)) == ut
+end
+
 @testset "sparse matrix construction" begin
     @test (A = fill(1.0+im,5,5); isequal(Array(sparse(A)), A))
     @test_throws ArgumentError sparse([1,2,3], [1,2], [1,2,3], 3, 3)
@@ -94,6 +112,7 @@ end
     # with combine
     @test sparse([1, 1, 2, 2, 2], [1, 2, 1, 2, 2], 1.0, 2, 2, +) == sparse([1, 1, 2, 2], [1, 2, 1, 2], [1.0, 1.0, 1.0, 2.0], 2, 2)
     @test sparse([1, 1, 2, 2, 2], [1, 2, 1, 2, 2], -1.0, 2, 2, *) == sparse([1, 1, 2, 2], [1, 2, 1, 2], [-1.0, -1.0, -1.0, 1.0], 2, 2)
+    @test sparse(sparse(Int32.(1:5), Int32.(1:5), trues(5))') isa SparseMatrixCSC{Bool,Int32}
 end
 
 @testset "SparseMatrixCSC construction from UniformScaling" begin
@@ -165,7 +184,7 @@ end
     sz34 = spzeros(3, 4)
     se77 = sparse(1.0I, 7, 7)
     @testset "h+v concatenation" begin
-        @test [se44 sz42 sz41; sz34 se33] == se77
+        @test @inferred(hvcat((3, 2), se44, sz42, sz41, sz34, se33)) == se77 # [se44 sz42 sz41; sz34 se33]
         @test length(nonzeros([sp33 0I; 1I 0I])) == 6
     end
 
@@ -673,8 +692,6 @@ end
     @testset "common error checking of [c]transpose! methods (ftranspose!)" begin
         @test_throws DimensionMismatch transpose!(A[:, 1:(smalldim - 1)], A)
         @test_throws DimensionMismatch transpose!(A[1:(smalldim - 1), 1], A)
-        @test_throws ArgumentError transpose!((B = similar(A); resize!(rowvals(B), nnz(A) - 1); B), A)
-        @test_throws ArgumentError transpose!((B = similar(A); resize!(nonzeros(B), nnz(A) - 1); B), A)
     end
     @testset "common error checking of permute[!] methods / source-perm compat" begin
         @test_throws DimensionMismatch permute(A, p[1:(end - 1)], q)
@@ -769,6 +786,8 @@ end
     end
 
     @testset "empty cases" begin
+        errchecker(str) = occursin("reducing over an empty collection is not allowed", str) ||
+                          occursin("collection slices must be non-empty", str)
         @test sum(sparse(Int[])) === 0
         @test prod(sparse(Int[])) === 1
         @test_throws ArgumentError minimum(sparse(Int[]))
@@ -781,9 +800,9 @@ end
             @test isequal(f(spzeros(0, 1), dims=3), f(Matrix{Int}(I, 0, 1), dims=3))
         end
         for f in (minimum, maximum, findmin, findmax)
-            @test_throws ArgumentError f(spzeros(0, 1), dims=1)
+            @test_throws errchecker f(spzeros(0, 1), dims=1)
             @test isequal(f(spzeros(0, 1), dims=2), f(Matrix{Int}(I, 0, 1), dims=2))
-            @test_throws ArgumentError f(spzeros(0, 1), dims=(1, 2))
+            @test_throws errchecker f(spzeros(0, 1), dims=(1, 2))
             @test isequal(f(spzeros(0, 1), dims=3), f(Matrix{Int}(I, 0, 1), dims=3))
         end
     end
@@ -1338,10 +1357,10 @@ end
 @testset "argmax, argmin, findmax, findmin" begin
     S = sprand(100,80, 0.5)
     A = Array(S)
-    @test argmax(S) == argmax(A)
-    @test argmin(S) == argmin(A)
-    @test findmin(S) == findmin(A)
-    @test findmax(S) == findmax(A)
+    @test @inferred(argmax(S)) == argmax(A)
+    @test @inferred(argmin(S)) == argmin(A)
+    @test @inferred(findmin(S)) == findmin(A)
+    @test @inferred(findmax(S)) == findmax(A)
     for region in [(1,), (2,), (1,2)], m in [findmax, findmin]
         @test m(S, dims=region) == m(A, dims=region)
     end
@@ -1725,7 +1744,7 @@ end
     local A = guardseed(1234321) do
         triu(sprand(10, 10, 0.2))
     end
-    @test getcolptr(SparseArrays.droptol!(A, 0.01)) == [1, 2, 2, 3, 4, 5, 5, 6, 8, 10, 13]
+    @test getcolptr(SparseArrays.droptol!(A, 0.01)) == [1, 1, 1, 1, 3, 3, 5, 6, 8, 11, 12]
     @test isequal(SparseArrays.droptol!(sparse([1], [1], [1]), 1), SparseMatrixCSC(1, 1, Int[1, 1], Int[], Int[]))
 end
 
@@ -2167,6 +2186,12 @@ end
     @test issparse(LinearAlgebra.UnitLowerTriangular(Array(m))) == false
     @test issparse(UpperTriangular(Array(m))) == false
     @test issparse(LinearAlgebra.UnitUpperTriangular(Array(m))) == false
+    @test issparse(Base.ReshapedArray(m, (20, 5), ()))
+    @test issparse(@view m[1:3, :])
+
+    # greater nesting
+    @test issparse(Symmetric(UpperTriangular(m)))
+    @test issparse(Symmetric(UpperTriangular(Array(m)))) == false
 end
 
 @testset "issparse for sparse vectors #34253" begin
@@ -2201,7 +2226,7 @@ end
     # Test that concatenations of pairs of sparse matrices yield sparse arrays
     @test issparse(vcat(spmat, spmat))
     @test issparse(hcat(spmat, spmat))
-    @test issparse(hvcat((2,), spmat, spmat))
+    @test issparse(@inferred(hvcat((2,), spmat, spmat)))
     @test issparse(cat(spmat, spmat; dims=(1,2)))
     # Test that concatenations of a sparse matrice with a dense matrix/vector yield sparse arrays
     @test issparse(vcat(spmat, densemat))
@@ -2342,22 +2367,70 @@ end
     for c in unstored_indices
         @test Base.isstored(A, c[1], c[2]) == false
     end
+
+    # `isstored` for adjoint and tranposed matrices:
+    for trans in (adjoint, transpose)
+        B = trans(A)
+        stored_indices = [CartesianIndex(j, i) for (j, i) in zip(J, I)]
+        unstored_indices = [c for c in CartesianIndices((n, m)) if !(c in stored_indices)]
+        for c in stored_indices
+            @test Base.isstored(B, c[1], c[2]) == true
+        end
+        for c in unstored_indices
+            @test Base.isstored(B, c[1], c[2]) == false
+        end
+    end
 end
 
 @testset "show" begin
     io = IOBuffer()
-    show(io, MIME"text/plain"(), spzeros(Float64, Int64, 0, 0))
-    @test String(take!(io)) == "0×0 SparseArrays.SparseMatrixCSC{Float64, Int64} with 0 stored entries"
-    show(io, MIME"text/plain"(), sparse(Int64[1], Int64[1], [1.0]))
-    @test String(take!(io)) == "1×1 SparseArrays.SparseMatrixCSC{Float64, Int64} with 1 stored entry:\n 1.0"
-    show(io, MIME"text/plain"(), spzeros(Float32, Int64, 2, 2))
-    @test String(take!(io)) == "2×2 SparseArrays.SparseMatrixCSC{Float32, Int64} with 0 stored entries:\n  ⋅    ⋅ \n  ⋅    ⋅ "
+
+    A = spzeros(Float64, Int64, 0, 0)
+    for (transform, showstring) in zip(
+        (identity, adjoint, transpose), (
+        "0×0 $SparseMatrixCSC{Float64, Int64} with 0 stored entries",
+        "0×0 $Adjoint{Float64, $SparseMatrixCSC{Float64, Int64}} with 0 stored entries",
+        "0×0 $Transpose{Float64, $SparseMatrixCSC{Float64, Int64}} with 0 stored entries"
+        ))
+        show(io, MIME"text/plain"(), transform(A))
+        @test String(take!(io)) == showstring
+    end
+
+    A = sparse(Int64[1], Int64[1], [1.0])
+    for (transform, showstring) in zip(
+        (identity, adjoint, transpose), (
+        "1×1 $SparseMatrixCSC{Float64, Int64} with 1 stored entry:\n 1.0",
+        "1×1 $Adjoint{Float64, $SparseMatrixCSC{Float64, Int64}} with 1 stored entry:\n 1.0",
+        "1×1 $Transpose{Float64, $SparseMatrixCSC{Float64, Int64}} with 1 stored entry:\n 1.0",
+        ))
+        show(io, MIME"text/plain"(), transform(A))
+        @test String(take!(io)) == showstring
+    end
+
+    A = spzeros(Float32, Int64, 2, 2)
+    for (transform, showstring) in zip(
+        (identity, adjoint, transpose), (
+        "2×2 $SparseMatrixCSC{Float32, Int64} with 0 stored entries:\n  ⋅    ⋅ \n  ⋅    ⋅ ",
+        "2×2 $Adjoint{Float32, $SparseMatrixCSC{Float32, Int64}} with 0 stored entries:\n  ⋅    ⋅ \n  ⋅    ⋅ ",
+        "2×2 $Transpose{Float32, $SparseMatrixCSC{Float32, Int64}} with 0 stored entries:\n  ⋅    ⋅ \n  ⋅    ⋅ ",
+        ))
+        show(io, MIME"text/plain"(), transform(A))
+        @test String(take!(io)) == showstring
+    end
 
     A = sparse(Int64[1, 1], Int64[1, 2], [1.0, 2.0])
-    show(io, MIME"text/plain"(), A)
-    @test String(take!(io)) == "1×2 SparseArrays.SparseMatrixCSC{Float64, Int64} with 2 stored entries:\n 1.0  2.0"
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    @test String(take!(io)) == "⠉"
+    for (transform, showstring, braille) in zip(
+        (identity, adjoint, transpose), (
+        "1×2 $SparseMatrixCSC{Float64, Int64} with 2 stored entries:\n 1.0  2.0",
+        "2×1 $Adjoint{Float64, $SparseMatrixCSC{Float64, Int64}} with 2 stored entries:\n 1.0\n 2.0",
+        "2×1 $Transpose{Float64, $SparseMatrixCSC{Float64, Int64}} with 2 stored entries:\n 1.0\n 2.0",
+        ),
+        ("⠉", "⠃", "⠃"))
+        show(io, MIME"text/plain"(), transform(A))
+        @test String(take!(io)) == showstring
+        _show_with_braille_patterns(convert(IOContext, io), transform(A))
+        @test String(take!(io)) == braille
+    end
 
     # every 1-dot braille pattern
     for (i, b) in enumerate(split("⠁⠂⠄⡀⠈⠐⠠⢀", ""))
@@ -2369,28 +2442,50 @@ end
 
     # empty braille pattern Char(10240)
     A = spzeros(Int64, Int64, 4, 2)
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    @test String(take!(io)) == "" * Char(10240)
+    for (transform, braille) in zip(
+        (identity, adjoint, transpose),
+        ("" * Char(10240), "" * Char(10240)^2, "" * Char(10240)^2))
+        _show_with_braille_patterns(convert(IOContext, io), transform(A))
+        @test String(take!(io)) == braille
+    end
 
     A = sparse(Int64[1, 2, 4, 2, 3], Int64[1, 1, 1, 2, 2], Int64[1, 1, 1, 1, 1], 4, 2)
-    show(io, MIME"text/plain"(), A)
-    @test String(take!(io)) == "4×2 SparseArrays.SparseMatrixCSC{Int64, Int64} with 5 stored entries:\n 1  ⋅\n 1  1\n ⋅  1\n 1  ⋅"
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    @test String(take!(io)) == "⡳"
+    for (transform, showstring, braille) in zip(
+        (identity, adjoint, transpose), (
+        "4×2 $SparseMatrixCSC{Int64, Int64} with 5 stored entries:\n 1  ⋅\n 1  1\n ⋅  1\n 1  ⋅",
+        "2×4 $Adjoint{Int64, $SparseMatrixCSC{Int64, Int64}} with 5 stored entries:\n 1  1  ⋅  1\n ⋅  1  1  ⋅",
+        "2×4 $Transpose{Int64, $SparseMatrixCSC{Int64, Int64}} with 5 stored entries:\n 1  1  ⋅  1\n ⋅  1  1  ⋅",
+        ),
+        ("⡳", "⠙⠊", "⠙⠊"))
+        show(io, MIME"text/plain"(), transform(A))
+        @test String(take!(io)) == showstring
+        _show_with_braille_patterns(convert(IOContext, io), transform(A))
+        @test String(take!(io)) == braille
+    end
 
     A = sparse(Int64[1, 3, 2, 4], Int64[1, 1, 2, 2], Int64[1, 1, 1, 1], 7, 3)
-    show(io, MIME"text/plain"(), A)
-    @test String(take!(io)) == "7×3 SparseArrays.SparseMatrixCSC{Int64, Int64} with 4 stored entries:\n 1  ⋅  ⋅\n ⋅  1  ⋅\n 1  ⋅  ⋅\n ⋅  1  ⋅\n ⋅  ⋅  ⋅\n ⋅  ⋅  ⋅\n ⋅  ⋅  ⋅"
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    @test String(take!(io)) == "⢕" * Char(10240) * "\n" * Char(10240)^2
+    for (transform, showstring, braille) in zip(
+        (identity, adjoint, transpose), (
+        "7×3 $SparseMatrixCSC{Int64, Int64} with 4 stored entries:\n 1  ⋅  ⋅\n ⋅  1  ⋅\n 1  ⋅  ⋅\n ⋅  1  ⋅\n ⋅  ⋅  ⋅\n ⋅  ⋅  ⋅\n ⋅  ⋅  ⋅",
+        "3×7 $Adjoint{Int64, $SparseMatrixCSC{Int64, Int64}} with 4 stored entries:\n 1  ⋅  1  ⋅  ⋅  ⋅  ⋅\n ⋅  1  ⋅  1  ⋅  ⋅  ⋅\n ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅",
+        "3×7 $Transpose{Int64, $SparseMatrixCSC{Int64, Int64}} with 4 stored entries:\n 1  ⋅  1  ⋅  ⋅  ⋅  ⋅\n ⋅  1  ⋅  1  ⋅  ⋅  ⋅\n ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅",
+        ),
+        ("⢕" * Char(10240) * "\n" * Char(10240)^2, "⠑⠑" * Char(10240)^2, "⠑⠑" * Char(10240)^2))
+        show(io, MIME"text/plain"(), transform(A))
+        @test String(take!(io)) == showstring
+        _show_with_braille_patterns(convert(IOContext, io), transform(A))
+        @test String(take!(io)) == braille
+    end
 
     A = sparse(Int64[1:10;], Int64[1:10;], fill(Float64(1), 10))
-    _show_with_braille_patterns(convert(IOContext, io), A)
     brailleString = "⠑⢄" * Char(10240)^3 * "\n" * Char(10240)^2 * "⠑⢄" * Char(10240) * "\n" * Char(10240)^4 * "⠑"
-    @test String(take!(io)) == brailleString
+    for transform in (identity, adjoint, transpose)
+        _show_with_braille_patterns(convert(IOContext, io), transform(A))
+        @test String(take!(io)) == brailleString
+    end
 
     # Issue #30589
-    @test repr("text/plain", sparse([true true])) == "1×2 SparseArrays.SparseMatrixCSC{Bool, $Int} with 2 stored entries:\n 1  1"
+    @test repr("text/plain", sparse([true true])) == "1×2 $SparseMatrixCSC{Bool, $Int} with 2 stored entries:\n 1  1"
 
     function _filled_sparse(m::Integer, n::Integer)
         C = CartesianIndices((m, n))[:]
@@ -2416,19 +2511,6 @@ end
     @test String(take!(io)) == "⠛⠛"
 end
 
-@testset "check buffers" for n in 1:3
-    local A
-    rowval = [1,2,3]
-    nzval1  = Int[]
-    nzval2  = [1,1,1]
-    A = SparseMatrixCSC(n, n, [1:n+1;], rowval, nzval1)
-    @test nnz(A) == n
-    @test_throws BoundsError A[n,n]
-    A = SparseMatrixCSC(n, n, [1:n+1;], rowval, nzval2)
-    @test nnz(A) == n
-    @test A      == Matrix(I, n, n)
-end
-
 @testset "reverse search direction if step < 0 #21986" begin
     local A, B
     A = guardseed(1234) do
@@ -2487,22 +2569,22 @@ end
     @test typeof(simA) == typeof(A)
     @test size(simA) == (6,6)
     @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(rowvals(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry type and Dims{2} specification (preserves storage space only)
+    @test length(rowvals(simA)) == 0
+    @test length(nonzeros(simA)) == 0
+    # test similar with entry type and Dims{2} specification (empty storage space)
     simA = similar(A, Float32, (6,6))
     @test typeof(simA) == SparseMatrixCSC{Float32,eltype(getcolptr(A))}
     @test size(simA) == (6,6)
     @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(rowvals(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
+    @test length(rowvals(simA)) == 0
+    @test length(nonzeros(simA)) == 0
     # test similar with entry type, index type, and Dims{2} specification (preserves storage space only)
     simA = similar(A, Float32, Int8, (6,6))
     @test typeof(simA) == SparseMatrixCSC{Float32, Int8}
     @test size(simA) == (6,6)
     @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(rowvals(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
+    @test length(rowvals(simA)) == 0
+    @test length(nonzeros(simA)) == 0
     # test similar with Dims{1} specification (preserves nothing)
     simA = similar(A, (6,))
     @test typeof(simA) == SparseVector{eltype(nonzeros(A)),eltype(getcolptr(A))}
@@ -2530,8 +2612,6 @@ end
     # count should throw for sparse arrays for which zero(eltype) does not exist
     @test_throws MethodError count(SparseMatrixCSC(2, 2, Int[1, 2, 3], Int[1, 2], Any[true, true]))
     @test_throws MethodError count(SparseVector(2, Int[1], Any[true]))
-    # count should run only over nonzeros(S)[1:nnz(S)], not nonzeros(S) in full
-    @test count(SparseMatrixCSC(2, 2, Int[1, 2, 3], Int[1, 2], Bool[true, true, true])) == 2
 end
 
 @testset "sparse findprev/findnext operations" begin
@@ -2599,15 +2679,6 @@ end
     @test sum(s, dims=2) == reshape([1, 2, 3], 3, 1)
 end
 
-@testset "mapreduce of sparse matrices with trailing elements in nzval #26534" begin
-    B = SparseMatrixCSC{Int,Int}(2, 3,
-        [1, 3, 4, 5],
-        [1, 2, 1, 2, 999, 999, 999, 999],
-        [1, 2, 3, 6, 999, 999, 999, 999]
-    )
-    @test maximum(B) == 6
-end
-
 _length_or_count_or_five(::Colon) = 5
 _length_or_count_or_five(x::AbstractVector{Bool}) = count(x)
 _length_or_count_or_five(x) = length(x)
@@ -2883,19 +2954,6 @@ end
     @test sparse(deepwrap(A)) == Matrix(deepwrap(B))
 end
 
-@testset "unary operations on matrices where length(nzval)>nnz" begin
-    # this should create a sparse matrix with length(nzval)>nnz
-    A = SparseMatrixCSC(Complex{BigInt}[1+im 2+2im]')'[1:1, 2:2]
-    # ...ensure it does! If necessary, the test needs to be updated to use
-    # another mechanism to create a suitable A.
-    resize!(nonzeros(A), 2)
-    @assert length(nonzeros(A)) > nnz(A)
-    @test -A == fill(-2-2im, 1, 1)
-    @test conj(A) == fill(2-2im, 1, 1)
-    conj!(A)
-    @test A == fill(2-2im, 1, 1)
-end
-
 @testset "issue #31453" for T in [UInt8, Int8, UInt16, Int16, UInt32, Int32]
     i = Int[1, 2]
     j = Int[2, 1]
@@ -2945,14 +3003,8 @@ end
     @test_throws ArgumentError SparseMatrixCSC(10, 3, [1,2,1,2], Int[], Float64[])
     # rowwal (and nzval) short
     @test_throws ArgumentError SparseMatrixCSC(10, 3, [1,2,2,4], [1,2], Float64[])
-    # nzval short
-    @test SparseMatrixCSC(10, 3, [1,2,2,4], [1,2,3], Float64[]) !== nothing
-    # length(rowval) >= typemax
-    @test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], fill(Int8(1),127), Int[1,2,3])
-    @test SparseMatrixCSC{Int,Int8}(5, 1, Int8[1,2], fill(Int8(1),127), Int[1,2,3]) != 0
     # length(nzval) >= typemax
-    @test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], Int8[1], fill(7, 127))
-    @test SparseMatrixCSC{Int,Int8}(5, 1, Int8[1,2], Int8[1], fill(7, 127)) != 0
+    @test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], fill(Int8(1), 127), fill(7, 127))
 
     # length(I) >= typemax
     @test_throws ArgumentError sparse(UInt8.(1:255), fill(UInt8(1), 255), fill(1, 255))
@@ -3192,4 +3244,21 @@ end
     end
 end
 
+@testset "issue #41135" begin
+    @test repr(SparseMatrixCSC([7;;])) == "sparse([1], [1], [7], 1, 1)"
+
+    m = SparseMatrixCSC([0 3; 4 0])
+    @test repr(m) == "sparse([2, 1], [1, 2], [4, 3], 2, 2)"
+    @test eval(Meta.parse(repr(m))) == m
+    @test summary(m) == "2×2 $SparseMatrixCSC{$Int, $Int} with 2 stored entries"
+
+    m = sprand(100, 100, .1)
+    @test occursin(r"^sparse\(\[.+\], \[.+\], \[.+\], \d+, \d+\)$", repr(m))
+    @test eval(Meta.parse(repr(m))) == m
+
+    m = sparse([85, 5, 38, 37, 59], [19, 72, 76, 98, 162], [0.8, 0.3, 0.2, 0.1, 0.5], 100, 200)
+    @test repr(m) == "sparse([85, 5, 38, 37, 59], [19, 72, 76, 98, 162], [0.8, 0.3, 0.2, 0.1, 0.5], 100, 200)"
+    @test eval(Meta.parse(repr(m))) == m
+end
+
 end # module
diff --git a/stdlib/SparseArrays/test/sparsevector.jl b/stdlib/SparseArrays/test/sparsevector.jl
index 79c952620050c4..2dc9738111a87c 100644
--- a/stdlib/SparseArrays/test/sparsevector.jl
+++ b/stdlib/SparseArrays/test/sparsevector.jl
@@ -33,6 +33,9 @@ x1_full[SparseArrays.nonzeroinds(spv_x1)] = nonzeros(spv_x1)
     @test SparseArrays.nonzeroinds(x) == [2, 5, 6]
     @test nonzeros(x) == [1.25, -0.75, 3.5]
     @test count(SparseVector(8, [2, 5, 6], [true,false,true])) == 2
+    y = SparseVector(typemax(Int128), Int128[4], [5])
+    @test y isa SparseVector{Int,Int128}
+    @test @inferred size(y) == (@inferred(length(y)),)
 end
 
 @testset "isstored" begin
@@ -75,7 +78,11 @@ end
 @testset "other constructors" begin
     # construct empty sparse vector
 
-    @test exact_equal(spzeros(Float64, 8), SparseVector(8, Int[], Float64[]))
+    for dims in (8, (8,))
+        @test exact_equal(spzeros(dims), SparseVector(8, Int[], Float64[]))
+        @test exact_equal(spzeros(Float64, dims), SparseVector(8, Int[], Float64[]))
+        @test exact_equal(spzeros(Float64, Int16, dims), SparseVector(8, Int16[], Float64[]))
+    end
 
     @testset "from list of indices and values" begin
         @test exact_equal(
@@ -789,6 +796,19 @@ end
     @test sum(x) == 4.0
     @test sum(abs, x) == 5.5
     @test sum(abs2, x) == 14.375
+    @test @inferred(sum(t -> true, x)) === 8
+    @test @inferred(sum(t -> abs(t) + one(t), x)) == 13.5
+
+    @test @inferred(sum(t -> true, spzeros(Float64, 8))) === 8
+    @test @inferred(sum(t -> abs(t) + one(t), spzeros(Float64, 8))) === 8.0
+
+    # reducing over an empty collection
+    # FIXME sum(f, []) throws, should be fixed both for generic and sparse vectors
+    @test_broken sum(t -> true, zeros(Float64, 0)) === 0
+    @test_broken sum(t -> true, spzeros(Float64, 0)) === 0
+    @test @inferred(sum(abs2, spzeros(Float64, 0))) === 0.0
+    @test_broken sum(t -> abs(t) + one(t), zeros(Float64, 0)) === 0.0
+    @test_broken sum(t -> abs(t) + one(t), spzeros(Float64, 0)) === 0.0
 
     @test norm(x) == sqrt(14.375)
     @test norm(x, 1) == 5.5
@@ -802,6 +822,12 @@ end
         @test minimum(x) == -0.75
         @test maximum(abs, x) == 3.5
         @test minimum(abs, x) == 0.0
+        @test @inferred(minimum(t -> true, x)) === true
+        @test @inferred(maximum(t -> true, x)) === true
+        @test @inferred(minimum(t -> abs(t) + one(t), x)) == 1.0
+        @test @inferred(maximum(t -> abs(t) + one(t), x)) == 4.5
+        @test @inferred(minimum(t -> t + one(t), x)) == 0.25
+        @test @inferred(maximum(t -> -abs(t) + one(t), x)) == 1.0
     end
 
     let x = abs.(spv_x1)
@@ -826,6 +852,15 @@ end
         @test minimum(x) == 0.0
         @test maximum(abs, x) == 0.0
         @test minimum(abs, x) == 0.0
+        @test @inferred(minimum(t -> true, x)) === true
+        @test @inferred(maximum(t -> true, x)) === true
+        @test @inferred(minimum(t -> abs(t) + one(t), x)) === 1.0
+        @test @inferred(maximum(t -> abs(t) + one(t), x)) === 1.0
+    end
+
+    let x = spzeros(Float64, 0)
+        @test_throws ArgumentError minimum(t -> true, x)
+        @test_throws ArgumentError maximum(t -> true, x)
     end
 end
 
@@ -1387,27 +1422,27 @@ end
     # test entry points to similar with entry type, index type, and non-Dims shape specification
     @test similar(A, Float32, Int8, 6, 6) == similar(A, Float32, Int8, (6, 6))
     @test similar(A, Float32, Int8, 6) == similar(A, Float32, Int8, (6,))
-    # test similar with Dims{2} specification (preserves storage space only, not stored-entry structure)
+    # test similar with Dims{2} specification (preserves allocated storage space only, not stored-entry structure)
     simA = similar(A, (6,6))
     @test typeof(simA) == SparseMatrixCSC{eltype(nonzeros(A)),eltype(nonzeroinds(A))}
     @test size(simA) == (6,6)
     @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(nonzeroinds(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry type and Dims{2} specification (preserves storage space only)
+    @test length(rowvals(simA)) == 0
+    @test length(nonzeros(simA)) == 0
+    # test similar with entry type and Dims{2} specification (preserves allocated storage space only)
     simA = similar(A, Float32, (6,6))
     @test typeof(simA) == SparseMatrixCSC{Float32,eltype(nonzeroinds(A))}
     @test size(simA) == (6,6)
     @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(nonzeroinds(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
+    @test length(rowvals(simA)) == 0
+    @test length(nonzeros(simA)) == 0
     # test similar with entry type, index type, and Dims{2} specification (preserves storage space only)
     simA = similar(A, Float32, Int8, (6,6))
     @test typeof(simA) == SparseMatrixCSC{Float32, Int8}
     @test size(simA) == (6,6)
     @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(nonzeroinds(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
+    @test length(rowvals(simA)) == 0
+    @test length(nonzeros(simA)) == 0
 end
 
 @testset "Fast operations on full column views" begin
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index 50d261cc008b03..84cdf8630e8fc6 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,2 +1,2 @@
 STATISTICS_BRANCH = master
-STATISTICS_SHA1 = 4b3ef9aaa79350510ca0be395458f66051c2f92d
+STATISTICS_SHA1 = 54f9b0d999813aa9fab039f632df222ffd2a96a8
diff --git a/stdlib/SuiteSparse.version b/stdlib/SuiteSparse.version
new file mode 100644
index 00000000000000..e2700363ad665a
--- /dev/null
+++ b/stdlib/SuiteSparse.version
@@ -0,0 +1,2 @@
+SUITESPARSE_BRANCH = master
+SUITESPARSE_SHA1 = b15c39be53f7823c721c1f8a7c036105e2baa04a
diff --git a/stdlib/SuiteSparse/Project.toml b/stdlib/SuiteSparse/Project.toml
deleted file mode 100644
index 0312b1625f7e39..00000000000000
--- a/stdlib/SuiteSparse/Project.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-name = "SuiteSparse"
-uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
-
-[deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-SuiteSparse_jll = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-
-[extras]
-DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test", "Printf", "Random", "DelimitedFiles", "Serialization"]
diff --git a/stdlib/SuiteSparse/docs/src/index.md b/stdlib/SuiteSparse/docs/src/index.md
deleted file mode 100644
index e8654caf943b40..00000000000000
--- a/stdlib/SuiteSparse/docs/src/index.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Sparse Linear Algebra
-
-```@meta
-DocTestSetup = :(using LinearAlgebra, SparseArrays, SuiteSparse)
-```
-
-Sparse matrix solvers call functions from [SuiteSparse](http://suitesparse.com). The following factorizations are available:
-
-| Type                              | Description                                   |
-|:--------------------------------- |:--------------------------------------------- |
-| `SuiteSparse.CHOLMOD.Factor`      | Cholesky factorization                        |
-| `SuiteSparse.UMFPACK.UmfpackLU`   | LU factorization                              |
-| `SuiteSparse.SPQR.QRSparse`       | QR factorization                              |
-
-Other solvers such as [Pardiso.jl](https://github.com/JuliaSparse/Pardiso.jl/) are as external packages. [Arpack.jl](https://julialinearalgebra.github.io/Arpack.jl/stable/) provides `eigs` and `svds` for iterative solution of eigensystems and singular value decompositions.
-
-These factorizations are described in the [`Linear Algebra`](@ref man-linalg) section of the manual:
-1. [`cholesky`](@ref)
-2. [`ldlt`](@ref)
-3. [`lu`](@ref)
-4. [`qr`](@ref)
-
-```@docs
-SuiteSparse.CHOLMOD.lowrankupdate
-SuiteSparse.CHOLMOD.lowrankupdate!
-SuiteSparse.CHOLMOD.lowrankdowndate
-SuiteSparse.CHOLMOD.lowrankdowndate!
-SuiteSparse.CHOLMOD.lowrankupdowndate!
-```
-
-
-```@meta
-DocTestSetup = nothing
-```
diff --git a/stdlib/SuiteSparse/src/SuiteSparse.jl b/stdlib/SuiteSparse/src/SuiteSparse.jl
deleted file mode 100644
index e07e6aaea1c5ae..00000000000000
--- a/stdlib/SuiteSparse/src/SuiteSparse.jl
+++ /dev/null
@@ -1,31 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module SuiteSparse
-
-import Base: \
-import LinearAlgebra: ldiv!, rdiv!
-
-## Functions to switch to 0-based indexing to call external sparse solvers
-
-# Convert from 1-based to 0-based indices
-function decrement!(A::AbstractArray{T}) where T<:Integer
-    for i in eachindex(A); A[i] -= oneunit(T) end
-    A
-end
-decrement(A::AbstractArray{<:Integer}) = decrement!(copy(A))
-
-# Convert from 0-based to 1-based indices
-function increment!(A::AbstractArray{T}) where T<:Integer
-    for i in eachindex(A); A[i] += oneunit(T) end
-    A
-end
-increment(A::AbstractArray{<:Integer}) = increment!(copy(A))
-
-if Base.USE_GPL_LIBS
-    include("umfpack.jl")
-    include("cholmod.jl")
-    include("spqr.jl")
-    include("deprecated.jl")
-end
-
-end # module SuiteSparse
diff --git a/stdlib/SuiteSparse/src/cholmod.jl b/stdlib/SuiteSparse/src/cholmod.jl
deleted file mode 100644
index cbfee4a30cb41a..00000000000000
--- a/stdlib/SuiteSparse/src/cholmod.jl
+++ /dev/null
@@ -1,1875 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Theoretically CHOLMOD supports both Int32 and Int64 indices on 64-bit.
-# However experience suggests that using both in the same session causes memory
-# leaks, so we restrict indices to be SuiteSparse_long (see cholmod_h.jl).
-# Ref: https://github.com/JuliaLang/julia/issues/12664
-
-# Additionally, only Float64/ComplexF64 are supported in practice.
-# Ref: https://github.com/JuliaLang/julia/issues/25986
-
-module CHOLMOD
-
-import Base: (*), convert, copy, eltype, getindex, getproperty, show, size,
-             IndexStyle, IndexLinear, IndexCartesian, adjoint, axes
-using Base: require_one_based_indexing
-
-using LinearAlgebra
-import LinearAlgebra: (\),
-                 cholesky, cholesky!, det, diag, ishermitian, isposdef,
-                 issuccess, issymmetric, ldlt, ldlt!, logdet
-
-using SparseArrays
-using SparseArrays: getcolptr
-import Libdl
-
-export
-    Dense,
-    Factor,
-    Sparse
-
-import SparseArrays: AbstractSparseMatrix, SparseMatrixCSC, indtype, sparse, spzeros, nnz
-
-import ..increment, ..increment!, ..decrement, ..decrement!
-
-#########
-# Setup #
-#########
-
-include("cholmod_h.jl")
-
-const CHOLMOD_MIN_VERSION = v"2.1.1"
-
-const common_struct = Vector{Vector{UInt8}}()
-
-const common_supernodal = Vector{Ptr{Cint}}()
-const common_final_ll   = Vector{Ptr{Cint}}()
-const common_print      = Vector{Ptr{Cint}}()
-const common_itype      = Vector{Ptr{Cint}}()
-const common_dtype      = Vector{Ptr{Cint}}()
-const common_nmethods   = Vector{Ptr{Cint}}()
-const common_postorder  = Vector{Ptr{Cint}}()
-
-### These offsets are defined in SuiteSparse_wrapper.c
-const common_size = ccall((:jl_cholmod_common_size,:libsuitesparse_wrapper),Int,())
-
-const cholmod_com_offsets = Vector{Csize_t}(undef, 19)
-ccall((:jl_cholmod_common_offsets, :libsuitesparse_wrapper),
-    Nothing, (Ptr{Csize_t},), cholmod_com_offsets)
-
-## macro to generate the name of the C function according to the integer type
-macro cholmod_name(nm)
-    string("cholmod_l_", nm)
-end
-
-function start(a::Vector{UInt8})
-    @isok ccall((@cholmod_name("start"), :libcholmod),
-        Cint, (Ptr{UInt8},), a)
-    return a
-end
-
-function finish(a::Vector{UInt8})
-    @isok ccall((@cholmod_name("finish"), :libcholmod),
-        Cint, (Ptr{UInt8},), a)
-    return a
-end
-
-function defaults(a::Vector{UInt8})
-    @isok ccall((@cholmod_name("defaults"), :libcholmod),
-        Cint, (Ptr{UInt8},), a)
-    return a
-end
-
-const build_version_array = Vector{Cint}(undef, 3)
-ccall((:jl_cholmod_version, :libsuitesparse_wrapper), Cint, (Ptr{Cint},), build_version_array)
-const build_version = VersionNumber(build_version_array...)
-
-function __init__()
-    try
-        ### Check if the linked library is compatible with the Julia code
-        if Libdl.dlsym_e(Libdl.dlopen("libcholmod"), :cholmod_version) != C_NULL
-            current_version_array = Vector{Cint}(undef, 3)
-            ccall((:cholmod_version, :libcholmod), Cint, (Ptr{Cint},), current_version_array)
-            current_version = VersionNumber(current_version_array...)
-        else # CHOLMOD < 2.1.1 does not include cholmod_version()
-            current_version = v"0.0.0"
-        end
-
-
-        if current_version < CHOLMOD_MIN_VERSION
-            @warn """
-                CHOLMOD version incompatibility
-
-                Julia was compiled with CHOLMOD version $build_version. It is
-                currently linked with a version older than
-                $(CHOLMOD_MIN_VERSION). This might cause Julia to
-                terminate when working with sparse matrix factorizations,
-                e.g. solving systems of equations with \\.
-
-                It is recommended that you use Julia with a recent version
-                of CHOLMOD, or download the generic binaries
-                from www.julialang.org, which ship with the correct
-                versions of all dependencies.
-                """
-        elseif build_version_array[1] != current_version_array[1]
-            @warn """
-                CHOLMOD version incompatibility
-
-                Julia was compiled with CHOLMOD version $build_version. It is
-                currently linked with version $current_version.
-                This might cause Julia to terminate when working with
-                sparse matrix factorizations, e.g. solving systems of
-                equations with \\.
-
-                It is recommended that you use Julia with the same major
-                version of CHOLMOD as the one used during the build, or
-                download the generic binaries from www.julialang.org,
-                which ship with the correct versions of all dependencies.
-                """
-        end
-
-        intsize = Int(ccall((:jl_cholmod_sizeof_long,:libsuitesparse_wrapper),Csize_t,()))
-        if intsize != 4length(IndexTypes)
-            @error """
-                 CHOLMOD integer size incompatibility
-
-                 Julia was compiled with a version of CHOLMOD that
-                 supported $(32length(IndexTypes)) bit integers. It is
-                 currently linked with version that supports $(8intsize)
-                 integers. This might cause Julia to terminate when
-                 working with sparse matrix factorizations, e.g. solving
-                 systems of equations with \\.
-
-                 This problem can be fixed by modifying the Julia build
-                 configuration or by downloading the OS X or generic
-                 Linux binary from www.julialang.org, which include
-                 the correct versions of all dependencies.
-                 """
-        end
-
-        ### Initiate CHOLMOD
-        ### common_struct controls the type of factorization and keeps pointers
-        ### to temporary memory. We need to manage a copy for each thread.
-        nt = Threads.nthreads()
-        resize!(common_struct    , nt)
-        resize!(common_supernodal, nt)
-        resize!(common_final_ll  , nt)
-        resize!(common_print     , nt)
-        resize!(common_itype     , nt)
-        resize!(common_dtype     , nt)
-        resize!(common_nmethods  , nt)
-        resize!(common_postorder , nt)
-        for i in 1:nt
-            common_struct[i] = fill(0xff, common_size)
-
-            common_supernodal[i] = pointer(common_struct[i], cholmod_com_offsets[4] + 1)
-            common_final_ll[i]   = pointer(common_struct[i], cholmod_com_offsets[7] + 1)
-            common_print[i]      = pointer(common_struct[i], cholmod_com_offsets[13] + 1)
-            common_itype[i]      = pointer(common_struct[i], cholmod_com_offsets[18] + 1)
-            common_dtype[i]      = pointer(common_struct[i], cholmod_com_offsets[19] + 1)
-            common_nmethods[i]   = pointer(common_struct[i], cholmod_com_offsets[15] + 1)
-            common_postorder[i]  = pointer(common_struct[i], cholmod_com_offsets[17] + 1)
-
-            start(common_struct[i])              # initializes CHOLMOD
-            set_print_level(common_struct[i], 0) # no printing from CHOLMOD by default
-        end
-
-        # Register gc tracked allocator if CHOLMOD is new enough
-        if current_version >= v"3.0.0"
-            cnfg = cglobal((:SuiteSparse_config, :libsuitesparseconfig), Ptr{Cvoid})
-            unsafe_store!(cnfg, cglobal(:jl_malloc, Ptr{Cvoid}), 1)
-            unsafe_store!(cnfg, cglobal(:jl_calloc, Ptr{Cvoid}), 2)
-            unsafe_store!(cnfg, cglobal(:jl_realloc, Ptr{Cvoid}), 3)
-            unsafe_store!(cnfg, cglobal(:jl_free, Ptr{Cvoid}), 4)
-        end
-
-    catch ex
-        @error "Error during initialization of module CHOLMOD" exception=ex,catch_backtrace()
-    end
-end
-
-function set_print_level(cm::Vector{UInt8}, lev::Integer)
-    unsafe_store!(common_print[Threads.threadid()], lev)
-end
-
-####################
-# Type definitions #
-####################
-
-abstract type SuiteSparseStruct end
-
-# The three core data types for CHOLMOD: Dense, Sparse and Factor.
-# CHOLMOD manages the memory, so the Julia versions only wrap a
-# pointer to a struct.  Therefore finalizers should be registered each
-# time a pointer is returned from CHOLMOD.
-
-# Dense
-struct C_Dense{T<:VTypes} <: SuiteSparseStruct
-    nrow::Csize_t
-    ncol::Csize_t
-    nzmax::Csize_t
-    d::Csize_t
-    x::Ptr{T}
-    z::Ptr{Cvoid}
-    xtype::Cint
-    dtype::Cint
-end
-
-mutable struct Dense{Tv<:VTypes} <: DenseMatrix{Tv}
-    ptr::Ptr{C_Dense{Tv}}
-    function Dense{Tv}(ptr::Ptr{C_Dense{Tv}}) where Tv<:VTypes
-        if ptr == C_NULL
-            throw(ArgumentError("dense matrix construction failed for " *
-                "unknown reasons. Please submit a bug report."))
-        end
-        s = unsafe_load(ptr)
-        if s.xtype != xtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("xtype=$(s.xtype) not supported"))
-        elseif s.dtype != dtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("dtype=$(s.dtype) not supported"))
-        end
-        A = new(ptr)
-        finalizer(free!, A)
-        return A
-    end
-end
-Dense(p::Ptr{C_Dense{Tv}}) where {Tv<:VTypes} = Dense{Tv}(p)
-
-# Sparse
-# allow Cvoid pointer for reading matrices of unknown type from files as in
-# cholmod_read_sparse
-struct C_Sparse{Tv<:Union{Cvoid, VTypes}} <: SuiteSparseStruct
-    nrow::Csize_t
-    ncol::Csize_t
-    nzmax::Csize_t
-    p::Ptr{SuiteSparse_long}
-    i::Ptr{SuiteSparse_long}
-    nz::Ptr{SuiteSparse_long}
-    x::Ptr{Tv}
-    z::Ptr{Cvoid}
-    stype::Cint
-    itype::Cint
-    xtype::Cint
-    dtype::Cint
-    sorted::Cint
-    packed::Cint
-end
-
-mutable struct Sparse{Tv<:VTypes} <: AbstractSparseMatrix{Tv,SuiteSparse_long}
-    ptr::Ptr{C_Sparse{Tv}}
-    function Sparse{Tv}(ptr::Ptr{C_Sparse{Tv}}) where Tv<:VTypes
-        if ptr == C_NULL
-            throw(ArgumentError("sparse matrix construction failed for " *
-                "unknown reasons. Please submit a bug report."))
-        end
-        s = unsafe_load(ptr)
-        if s.itype != ityp(SuiteSparse_long)
-            free!(ptr)
-            throw(CHOLMODException("itype=$(s.itype) not supported"))
-        elseif s.xtype != xtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("xtype=$(s.xtype) not supported"))
-        elseif s.dtype != dtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("dtype=$(s.dtype) not supported"))
-        end
-        A = new(ptr)
-        finalizer(free!, A)
-        return A
-    end
-end
-Sparse(p::Ptr{C_Sparse{Tv}}) where {Tv<:VTypes} = Sparse{Tv}(p)
-
-# Useful when reading in files, but not type stable
-function Sparse(p::Ptr{C_Sparse{Cvoid}})
-    if p == C_NULL
-        throw(ArgumentError("sparse matrix construction failed for " *
-                            "unknown reasons. Please submit a bug report."))
-    end
-    s = unsafe_load(p)
-    Tv = s.xtype == REAL ? Float64 : ComplexF64
-    Sparse(convert(Ptr{C_Sparse{Tv}}, p))
-end
-
-Base.unsafe_convert(::Type{Ptr{Tv}}, A::Sparse{Tv}) where {Tv} = getfield(A, :ptr)
-
-# Factor
-# Cvoid is used for pattern-only factors
-struct C_Factor{Tv<:VTypes} <: SuiteSparseStruct
-    n::Csize_t
-    minor::Csize_t
-    Perm::Ptr{SuiteSparse_long}
-    ColCount::Ptr{SuiteSparse_long}
-    @static if build_version >= v"2.1.0"
-        IPerm::Ptr{SuiteSparse_long}  # this pointer was added in version 2.1.0
-    end
-    nzmax::Csize_t
-    p::Ptr{SuiteSparse_long}
-    i::Ptr{SuiteSparse_long}
-    x::Ptr{Tv}
-    z::Ptr{Cvoid}
-    nz::Ptr{SuiteSparse_long}
-    next::Ptr{SuiteSparse_long}
-    prev::Ptr{SuiteSparse_long}
-    nsuper::Csize_t
-    ssize::Csize_t
-    xsize::Csize_t
-    maxcsize::Csize_t
-    maxesize::Csize_t
-    super::Ptr{SuiteSparse_long}
-    pi::Ptr{SuiteSparse_long}
-    px::Ptr{SuiteSparse_long}
-    s::Ptr{SuiteSparse_long}
-    ordering::Cint
-    is_ll::Cint
-    is_super::Cint
-    is_monotonic::Cint
-    itype::Cint
-    xtype::Cint
-    dtype::Cint
-end
-
-mutable struct Factor{Tv<:VTypes} <: Factorization{Tv}
-    ptr::Ptr{C_Factor{Tv}}
-    function Factor{Tv}(ptr::Ptr{C_Factor{Tv}}, register_finalizer = true) where Tv
-        if ptr == C_NULL
-            throw(ArgumentError("factorization construction failed for " *
-                "unknown reasons. Please submit a bug report."))
-        end
-        s = unsafe_load(ptr)
-        if s.itype != ityp(SuiteSparse_long)
-            free!(ptr)
-            throw(CHOLMODException("itype=$(s.itype) not supported"))
-        elseif s.xtype != xtyp(Tv) && s.xtype != PATTERN
-            free!(ptr)
-            throw(CHOLMODException("xtype=$(s.xtype) not supported"))
-        elseif s.dtype != dtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("dtype=$(s.dtype) not supported"))
-        end
-        F = new(ptr)
-        if register_finalizer
-            finalizer(free!, F)
-        end
-        return F
-    end
-end
-Factor(ptr::Ptr{C_Factor{Tv}}) where {Tv<:VTypes} = Factor{Tv}(ptr)
-Factor(x::Factor) = x
-
-Base.adjoint(F::Factor) = Adjoint(F)
-Base.transpose(F::Factor) = Transpose(F)
-
-# All pointer loads should be checked to make sure that SuiteSparse is not called with
-# a C_NULL pointer which could cause a segfault. Pointers are set to null
-# when serialized so this can happen when multiple processes are in use.
-function Base.unsafe_convert(::Type{Ptr{T}}, x::Union{Dense,Sparse,Factor}) where T<:SuiteSparseStruct
-    xp = getfield(x, :ptr)
-    if xp == C_NULL
-        throw(ArgumentError("pointer to the $T object is null. This can " *
-            "happen if the object has been serialized."))
-    else
-        return xp
-    end
-end
-Base.pointer(x::Dense{Tv}) where {Tv}  = Base.unsafe_convert(Ptr{C_Dense{Tv}}, x)
-Base.pointer(x::Sparse{Tv}) where {Tv} = Base.unsafe_convert(Ptr{C_Sparse{Tv}}, x)
-Base.pointer(x::Factor{Tv}) where {Tv} = Base.unsafe_convert(Ptr{C_Factor{Tv}}, x)
-
-# FactorComponent, for encoding particular factors from a factorization
-mutable struct FactorComponent{Tv,S} <: AbstractMatrix{Tv}
-    F::Factor{Tv}
-
-    function FactorComponent{Tv,S}(F::Factor{Tv}) where {Tv,S}
-        s = unsafe_load(pointer(F))
-        if s.is_ll != 0
-            if !(S === :L || S === :U || S === :PtL || S === :UP)
-                throw(CHOLMODException(string(S, " not supported for sparse ",
-                    "LLt matrices; try :L, :U, :PtL, or :UP")))
-            end
-        elseif !(S === :L || S === :U || S === :PtL || S === :UP ||
-                S === :D || S === :LD || S === :DU || S === :PtLD || S === :DUP)
-            throw(CHOLMODException(string(S, " not supported for sparse LDLt ",
-                "matrices; try :L, :U, :PtL, :UP, :D, :LD, :DU, :PtLD, or :DUP")))
-        end
-        new(F)
-    end
-end
-function FactorComponent(F::Factor{Tv}, sym::Symbol) where Tv
-    FactorComponent{Tv,sym}(F)
-end
-
-Factor(FC::FactorComponent) = Factor(FC.F)
-
-#################
-# Thin wrappers #
-#################
-
-# Dense wrappers
-
-### cholmod_core_h ###
-function allocate_dense(m::Integer, n::Integer, d::Integer, ::Type{Tv}) where {Tv<:VTypes}
-    Dense(ccall((@cholmod_name("allocate_dense"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Csize_t, Csize_t, Csize_t, Cint, Ptr{Cvoid}),
-                m, n, d, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-
-function free!(p::Ptr{C_Dense{Tv}}) where {Tv<:VTypes}
-    @isok ccall((@cholmod_name("free_dense"), :libcholmod), Cint,
-                (Ref{Ptr{C_Dense{Tv}}}, Ptr{Cvoid}),
-                p, common_struct[Threads.threadid()])
-end
-function zeros(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("zeros"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Csize_t, Csize_t, Cint, Ptr{UInt8}),
-                m, n, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-zeros(m::Integer, n::Integer) = zeros(m, n, Float64)
-
-function ones(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("ones"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Csize_t, Csize_t, Cint, Ptr{UInt8}),
-                m, n, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-ones(m::Integer, n::Integer) = ones(m, n, Float64)
-
-function eye(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("eye"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Csize_t, Csize_t, Cint, Ptr{UInt8}),
-                m, n, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-eye(m::Integer, n::Integer) = eye(m, n, Float64)
-eye(n::Integer) = eye(n, n, Float64)
-
-function copy(A::Dense{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("copy_dense"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Ptr{C_Dense{Tv}}, Ptr{UInt8}),
-                A, common_struct[Threads.threadid()]))
-end
-
-function sort!(S::Sparse{Tv}) where Tv<:VTypes
-    @isok ccall((@cholmod_name("sort"), :libcholmod), Cint,
-                (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                S, common_struct[Threads.threadid()])
-    return S
-end
-
-### cholmod_matrixops.h ###
-function norm_dense(D::Dense{Tv}, p::Integer) where Tv<:VTypes
-    s = unsafe_load(pointer(D))
-    if p == 2
-        if s.ncol > 1
-            throw(ArgumentError("2 norm only supported when matrix has one column"))
-        end
-    elseif p != 0 && p != 1
-        throw(ArgumentError("second argument must be either 0 (Inf norm), 1, or 2"))
-    end
-    ccall((@cholmod_name("norm_dense"), :libcholmod), Cdouble,
-        (Ptr{C_Dense{Tv}}, Cint, Ptr{UInt8}),
-          D, p, common_struct[Threads.threadid()])
-end
-
-### cholmod_check.h ###
-function check_dense(A::Dense{Tv}) where Tv<:VTypes
-    ccall((@cholmod_name("check_dense"), :libcholmod), Cint,
-          (Ptr{C_Dense{Tv}}, Ptr{UInt8}),
-          pointer(A), common_struct[Threads.threadid()]) != 0
-end
-
-# Non-Dense wrappers
-### cholmod_core.h ###
-function allocate_sparse(nrow::Integer, ncol::Integer, nzmax::Integer,
-        sorted::Bool, packed::Bool, stype::Integer, ::Type{Tv}) where {Tv<:VTypes}
-    Sparse(ccall((@cholmod_name("allocate_sparse"), :libcholmod),
-            Ptr{C_Sparse{Tv}},
-                (Csize_t, Csize_t, Csize_t, Cint,
-                 Cint, Cint, Cint, Ptr{Cvoid}),
-                nrow, ncol, nzmax, sorted,
-                packed, stype, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-
-function free!(ptr::Ptr{C_Sparse{Tv}}) where Tv<:VTypes
-    @isok ccall((@cholmod_name("free_sparse"), :libcholmod), Cint,
-            (Ref{Ptr{C_Sparse{Tv}}}, Ptr{UInt8}),
-                ptr, common_struct[Threads.threadid()])
-end
-
-function free!(ptr::Ptr{C_Factor{Tv}}) where Tv<:VTypes
-    # Warning! Important that finalizer doesn't modify the global Common struct.
-    @isok ccall((@cholmod_name("free_factor"), :libcholmod), Cint,
-            (Ref{Ptr{C_Factor{Tv}}}, Ptr{Cvoid}),
-                ptr, common_struct[Threads.threadid()])
-end
-
-function aat(A::Sparse{Tv}, fset::Vector{SuiteSparse_long}, mode::Integer) where Tv<:VRealTypes
-    Sparse(ccall((@cholmod_name("aat"), :libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{SuiteSparse_long}, Csize_t, Cint, Ptr{UInt8}),
-                A, fset, length(fset), mode, common_struct[Threads.threadid()]))
-end
-
-function sparse_to_dense(A::Sparse{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("sparse_to_dense"),:libcholmod),
-        Ptr{C_Dense{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                A, common_struct[Threads.threadid()]))
-end
-function dense_to_sparse(D::Dense{Tv}, ::Type{SuiteSparse_long}) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("dense_to_sparse"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Dense{Tv}}, Cint, Ptr{UInt8}),
-                D, true, common_struct[Threads.threadid()]))
-end
-
-function factor_to_sparse!(F::Factor{Tv}) where Tv<:VTypes
-    ss = unsafe_load(pointer(F))
-    ss.xtype == PATTERN && throw(CHOLMODException("only numeric factors are supported"))
-    Sparse(ccall((@cholmod_name("factor_to_sparse"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-                F, common_struct[Threads.threadid()]))
-end
-
-function change_factor!(F::Factor{Tv}, to_ll::Bool, to_super::Bool, to_packed::Bool,
-                        to_monotonic::Bool) where Tv<:VTypes
-    @isok ccall((@cholmod_name("change_factor"),:libcholmod), Cint,
-            (Cint, Cint, Cint, Cint, Cint, Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-                xtyp(Tv), to_ll, to_super, to_packed, to_monotonic, F, common_struct[Threads.threadid()])
-end
-
-function check_sparse(A::Sparse{Tv}) where Tv<:VTypes
-    ccall((@cholmod_name("check_sparse"),:libcholmod), Cint,
-          (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-           A, common_struct[Threads.threadid()]) != 0
-end
-
-function check_factor(F::Factor{Tv}) where Tv<:VTypes
-    ccall((@cholmod_name("check_factor"),:libcholmod), Cint,
-          (Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-           F, common_struct[Threads.threadid()]) != 0
-end
-
-function nnz(A::Sparse{Tv}) where Tv<:VTypes
-    ccall((@cholmod_name("nnz"),:libcholmod), Int,
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                A, common_struct[Threads.threadid()])
-end
-
-function speye(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("speye"), :libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Csize_t, Csize_t, Cint, Ptr{UInt8}),
-                m, n, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-
-function spzeros(m::Integer, n::Integer, nzmax::Integer, ::Type{Tv}) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("spzeros"), :libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Csize_t, Csize_t, Csize_t, Cint, Ptr{UInt8}),
-             m, n, nzmax, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-
-function transpose_(A::Sparse{Tv}, values::Integer) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("transpose"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Cint, Ptr{UInt8}),
-                A, values, common_struct[Threads.threadid()]))
-end
-
-function copy(F::Factor{Tv}) where Tv<:VTypes
-    Factor(ccall((@cholmod_name("copy_factor"),:libcholmod),
-        Ptr{C_Factor{Tv}},
-            (Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-                F, common_struct[Threads.threadid()]))
-end
-function copy(A::Sparse{Tv}) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("copy_sparse"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                A, common_struct[Threads.threadid()]))
-end
-function copy(A::Sparse{Tv}, stype::Integer, mode::Integer) where Tv<:VRealTypes
-    Sparse(ccall((@cholmod_name("copy"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Cint, Cint, Ptr{UInt8}),
-                A, stype, mode, common_struct[Threads.threadid()]))
-end
-
-### cholmod_check.h ###
-function print_sparse(A::Sparse{Tv}, name::String) where Tv<:VTypes
-    isascii(name) || error("non-ASCII name: $name")
-    set_print_level(common_struct[Threads.threadid()], 3)
-    @isok ccall((@cholmod_name("print_sparse"),:libcholmod), Cint,
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}, Ptr{UInt8}),
-                 A, name, common_struct[Threads.threadid()])
-    nothing
-end
-function print_factor(F::Factor{Tv}, name::String) where Tv<:VTypes
-    set_print_level(common_struct[Threads.threadid()], 3)
-    @isok ccall((@cholmod_name("print_factor"),:libcholmod), Cint,
-            (Ptr{C_Factor{Tv}}, Ptr{UInt8}, Ptr{UInt8}),
-                F, name, common_struct[Threads.threadid()])
-    nothing
-end
-
-### cholmod_matrixops.h ###
-function ssmult(A::Sparse{Tv}, B::Sparse{Tv}, stype::Integer,
-        values::Bool, sorted::Bool) where Tv<:VRealTypes
-    lA = unsafe_load(pointer(A))
-    lB = unsafe_load(pointer(B))
-    if lA.ncol != lB.nrow
-        throw(DimensionMismatch("inner matrix dimensions do not fit"))
-    end
-    Sparse(ccall((@cholmod_name("ssmult"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{C_Sparse{Tv}}, Cint, Cint,
-                Cint, Ptr{UInt8}),
-             A, B, stype, values,
-                sorted, common_struct[Threads.threadid()]))
-end
-
-function norm_sparse(A::Sparse{Tv}, norm::Integer) where Tv<:VTypes
-    if norm != 0 && norm != 1
-        throw(ArgumentError("norm argument must be either 0 or 1"))
-    end
-    ccall((@cholmod_name("norm_sparse"), :libcholmod), Cdouble,
-            (Ptr{C_Sparse{Tv}}, Cint, Ptr{UInt8}),
-                A, norm, common_struct[Threads.threadid()])
-end
-
-function horzcat(A::Sparse{Tv}, B::Sparse{Tv}, values::Bool) where Tv<:VRealTypes
-    Sparse(ccall((@cholmod_name("horzcat"), :libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{C_Sparse{Tv}}, Cint, Ptr{UInt8}),
-             A, B, values, common_struct[Threads.threadid()]))
-end
-
-function scale!(S::Dense{Tv}, scale::Integer, A::Sparse{Tv}) where Tv<:VRealTypes
-    sS = unsafe_load(pointer(S))
-    sA = unsafe_load(pointer(A))
-    if sS.ncol != 1 && sS.nrow != 1
-        throw(DimensionMismatch("first argument must be a vector"))
-    end
-    if scale == SCALAR && sS.nrow != 1
-        throw(DimensionMismatch("scaling argument must have length one"))
-    elseif scale == ROW && sS.nrow*sS.ncol != sA.nrow
-        throw(DimensionMismatch("scaling vector has length $(sS.nrow*sS.ncol), " *
-            "but matrix has $(sA.nrow) rows."))
-    elseif scale == COL && sS.nrow*sS.ncol != sA.ncol
-        throw(DimensionMismatch("scaling vector has length $(sS.nrow*sS.ncol), " *
-            "but matrix has $(sA.ncol) columns"))
-    elseif scale == SYM
-        if sA.nrow != sA.ncol
-            throw(DimensionMismatch("matrix must be square"))
-        elseif sS.nrow*sS.ncol != sA.nrow
-            throw(DimensionMismatch("scaling vector has length $(sS.nrow*sS.ncol), " *
-                "but matrix has $(sA.ncol) columns and rows"))
-        end
-    end
-
-    sA = unsafe_load(pointer(A))
-    @isok ccall((@cholmod_name("scale"),:libcholmod), Cint,
-            (Ptr{C_Dense{Tv}}, Cint, Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                S, scale, A, common_struct[Threads.threadid()])
-    A
-end
-
-function sdmult!(A::Sparse{Tv}, transpose::Bool,
-        α::Number, β::Number, X::Dense{Tv}, Y::Dense{Tv}) where Tv<:VTypes
-    m, n = size(A)
-    nc = transpose ? m : n
-    nr = transpose ? n : m
-    if nc != size(X, 1)
-        throw(DimensionMismatch("incompatible dimensions, $nc and $(size(X,1))"))
-    end
-    @isok ccall((@cholmod_name("sdmult"),:libcholmod), Cint,
-            (Ptr{C_Sparse{Tv}}, Cint,
-             Ref{ComplexF64}, Ref{ComplexF64},
-             Ptr{C_Dense{Tv}}, Ptr{C_Dense{Tv}}, Ptr{UInt8}),
-                A, transpose, α, β, X, Y, common_struct[Threads.threadid()])
-    Y
-end
-
-function vertcat(A::Sparse{Tv}, B::Sparse{Tv}, values::Bool) where Tv<:VRealTypes
-    Sparse(ccall((@cholmod_name("vertcat"), :libcholmod),
-            Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{C_Sparse{Tv}}, Cint, Ptr{UInt8}),
-                A, B, values, common_struct[Threads.threadid()]))
-end
-
-function symmetry(A::Sparse{Tv}, option::Integer) where Tv<:VTypes
-    xmatched = Ref{SuiteSparse_long}()
-    pmatched = Ref{SuiteSparse_long}()
-    nzoffdiag = Ref{SuiteSparse_long}()
-    nzdiag = Ref{SuiteSparse_long}()
-    rv = ccall((@cholmod_name("symmetry"), :libcholmod), Cint,
-            (Ptr{C_Sparse{Tv}}, Cint, Ptr{SuiteSparse_long}, Ptr{SuiteSparse_long},
-                Ptr{SuiteSparse_long}, Ptr{SuiteSparse_long}, Ptr{UInt8}),
-                    A, option, xmatched, pmatched,
-                        nzoffdiag, nzdiag, common_struct[Threads.threadid()])
-    rv, xmatched[], pmatched[], nzoffdiag[], nzdiag[]
-end
-
-# cholmod_cholesky.h
-# For analyze, analyze_p, and factorize_p!, the Common argument must be
-# supplied in order to control if the factorization is LLt or LDLt
-function analyze(A::Sparse{Tv}, cmmn::Vector{UInt8}) where Tv<:VTypes
-    Factor(ccall((@cholmod_name("analyze"),:libcholmod),
-        Ptr{C_Factor{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                A, cmmn))
-end
-function analyze_p(A::Sparse{Tv}, perm::Vector{SuiteSparse_long},
-                   cmmn::Vector{UInt8}) where Tv<:VTypes
-    length(perm) != size(A,1) && throw(BoundsError())
-    Factor(ccall((@cholmod_name("analyze_p"),:libcholmod),
-            Ptr{C_Factor{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{SuiteSparse_long}, Ptr{SuiteSparse_long},
-                Csize_t, Ptr{UInt8}),
-                A, perm, C_NULL, 0, cmmn))
-end
-function factorize!(A::Sparse{Tv}, F::Factor{Tv}, cmmn::Vector{UInt8}) where Tv<:VTypes
-    @isok ccall((@cholmod_name("factorize"),:libcholmod), Cint,
-        (Ptr{C_Sparse{Tv}}, Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-            A, F, cmmn)
-    F
-end
-function factorize_p!(A::Sparse{Tv}, β::Real, F::Factor{Tv}, cmmn::Vector{UInt8}) where Tv<:VTypes
-    # note that β is passed as a complex number (double beta[2]),
-    # but the CHOLMOD manual says that only beta[0] (real part) is used
-    @isok ccall((@cholmod_name("factorize_p"),:libcholmod), Cint,
-        (Ptr{C_Sparse{Tv}}, Ref{ComplexF64}, Ptr{SuiteSparse_long}, Csize_t,
-         Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-            A, β, C_NULL, 0, F, cmmn)
-    F
-end
-
-function solve(sys::Integer, F::Factor{Tv}, B::Dense{Tv}) where Tv<:VTypes
-    if size(F,1) != size(B,1)
-        throw(DimensionMismatch("LHS and RHS should have the same number of rows. " *
-            "LHS has $(size(F,1)) rows, but RHS has $(size(B,1)) rows."))
-    end
-    if !issuccess(F)
-        s = unsafe_load(pointer(F))
-        if s.is_ll == 1
-            throw(LinearAlgebra.PosDefException(s.minor))
-        else
-            throw(LinearAlgebra.ZeroPivotException(s.minor))
-        end
-    end
-    Dense(ccall((@cholmod_name("solve"),:libcholmod), Ptr{C_Dense{Tv}},
-            (Cint, Ptr{C_Factor{Tv}}, Ptr{C_Dense{Tv}}, Ptr{UInt8}),
-                sys, F, B, common_struct[Threads.threadid()]))
-end
-
-function spsolve(sys::Integer, F::Factor{Tv}, B::Sparse{Tv}) where Tv<:VTypes
-    if size(F,1) != size(B,1)
-        throw(DimensionMismatch("LHS and RHS should have the same number of rows. " *
-            "LHS has $(size(F,1)) rows, but RHS has $(size(B,1)) rows."))
-    end
-    Sparse(ccall((@cholmod_name("spsolve"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Cint, Ptr{C_Factor{Tv}}, Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                sys, F, B, common_struct[Threads.threadid()]))
-end
-
-# Autodetects the types
-function read_sparse(file::Libc.FILE, ::Type{SuiteSparse_long})
-    ptr = ccall((@cholmod_name("read_sparse"), :libcholmod),
-        Ptr{C_Sparse{Cvoid}},
-            (Ptr{Cvoid}, Ptr{UInt8}),
-                file.ptr, common_struct[Threads.threadid()])
-    if ptr == C_NULL
-        throw(ArgumentError("sparse matrix construction failed. Check that input file is valid."))
-    end
-    Sparse(ptr)
-end
-
-function read_sparse(file::IO, T)
-    cfile = Libc.FILE(file)
-    try return read_sparse(cfile, T)
-    finally close(cfile)
-    end
-end
-
-function get_perm(F::Factor)
-    s = unsafe_load(pointer(F))
-    p = unsafe_wrap(Array, s.Perm, s.n, own = false)
-    p .+ 1
-end
-get_perm(FC::FactorComponent) = get_perm(Factor(FC))
-
-#########################
-# High level interfaces #
-#########################
-
-# Conversion/construction
-function Dense{T}(A::StridedVecOrMat) where T<:VTypes
-    d = allocate_dense(size(A, 1), size(A, 2), stride(A, 2), T)
-    GC.@preserve d begin
-        s = unsafe_load(pointer(d))
-        for (i, c) in enumerate(eachindex(A))
-            unsafe_store!(s.x, A[c], i)
-        end
-    end
-    d
-end
-function Dense{T}(A::Union{Adjoint{<:Any, <:StridedVecOrMat}, Transpose{<:Any, <:StridedVecOrMat}}) where T<:VTypes
-    d = allocate_dense(size(A, 1), size(A, 2), size(A, 1), T)
-    GC.@preserve d begin
-        s = unsafe_load(pointer(d))
-        for (i, c) in enumerate(eachindex(A))
-            unsafe_store!(s.x, A[c], i)
-        end
-    end
-    d
-end
-function Dense(A::Union{StridedVecOrMat, Adjoint{<:Any, <:StridedVecOrMat}, Transpose{<:Any, <:StridedVecOrMat}})
-    T = promote_type(eltype(A), Float64)
-    return Dense{T}(A)
-end
-Dense(A::Sparse) = sparse_to_dense(A)
-
-# This constructior assumes zero based colptr and rowval
-function Sparse(m::Integer, n::Integer,
-        colptr0::Vector{SuiteSparse_long}, rowval0::Vector{SuiteSparse_long},
-        nzval::Vector{Tv}, stype) where Tv<:VTypes
-    # checks
-    ## length of input
-    if length(colptr0) <= n
-        throw(ArgumentError("length of colptr0 must be at least n + 1 = $(n + 1) but was $(length(colptr0))"))
-    end
-    if colptr0[n + 1] > length(rowval0)
-        throw(ArgumentError("length of rowval0 is $(length(rowval0)) but value of colptr0 requires length to be at least $(colptr0[n + 1])"))
-    end
-    if colptr0[n + 1] > length(nzval)
-        throw(ArgumentError("length of nzval is $(length(nzval)) but value of colptr0 requires length to be at least $(colptr0[n + 1])"))
-    end
-    ## columns are sorted
-    iss = true
-    for i = 2:length(colptr0)
-        if !issorted(view(rowval0, colptr0[i - 1] + 1:colptr0[i]))
-            iss = false
-            break
-        end
-    end
-
-    o = allocate_sparse(m, n, colptr0[n + 1], iss, true, stype, Tv)
-    s = unsafe_load(pointer(o))
-
-    unsafe_copyto!(s.p, pointer(colptr0), n + 1)
-    unsafe_copyto!(s.i, pointer(rowval0), colptr0[n + 1])
-    unsafe_copyto!(s.x, pointer(nzval) , colptr0[n + 1])
-
-    @isok check_sparse(o)
-
-    return o
-end
-
-function Sparse(m::Integer, n::Integer,
-        colptr0::Vector{SuiteSparse_long},
-        rowval0::Vector{SuiteSparse_long},
-        nzval::Vector{<:VTypes})
-    o = Sparse(m, n, colptr0, rowval0, nzval, 0)
-
-    # sort indices
-    sort!(o)
-
-    # check if array is symmetric and change stype if it is
-    if ishermitian(o)
-        change_stype!(o, -1)
-    end
-    o
-end
-
-function Sparse{Tv}(A::SparseMatrixCSC, stype::Integer) where Tv<:VTypes
-    ## Check length of input. This should never fail but see #20024
-    if length(getcolptr(A)) <= size(A, 2)
-        throw(ArgumentError("length of colptr must be at least size(A,2) + 1 = $(size(A, 2) + 1) but was $(length(getcolptr(A)))"))
-    end
-    if nnz(A) > length(rowvals(A))
-        throw(ArgumentError("length of rowval is $(length(rowvals(A))) but value of colptr requires length to be at least $(nnz(A))"))
-    end
-    if nnz(A) > length(nonzeros(A))
-        throw(ArgumentError("length of nzval is $(length(nonzeros(A))) but value of colptr requires length to be at least $(nnz(A))"))
-    end
-
-    o = allocate_sparse(size(A, 1), size(A, 2), nnz(A), true, true, stype, Tv)
-    s = unsafe_load(pointer(o))
-    for i = 1:(size(A, 2) + 1)
-        unsafe_store!(s.p, getcolptr(A)[i] - 1, i)
-    end
-    for i = 1:nnz(A)
-        unsafe_store!(s.i, rowvals(A)[i] - 1, i)
-    end
-    if Tv <: Complex && stype != 0
-        # Need to remove any non real elements in the diagonal because, in contrast to
-        # BLAS/LAPACK these are not ignored by CHOLMOD. If even tiny imaginary parts are
-        # present CHOLMOD will fail with a non-positive definite/zero pivot error.
-        for j = 1:size(A, 2)
-            for ip = getcolptr(A)[j]:getcolptr(A)[j + 1] - 1
-                v = nonzeros(A)[ip]
-                unsafe_store!(s.x, rowvals(A)[ip] == j ? Complex(real(v)) : v, ip)
-            end
-        end
-    elseif Tv == eltype(nonzeros(A))
-        unsafe_copyto!(s.x, pointer(nonzeros(A)), nnz(A))
-    else
-        for i = 1:nnz(A)
-            unsafe_store!(s.x, nonzeros(A)[i], i)
-        end
-    end
-
-    @isok check_sparse(o)
-
-    return o
-end
-
-# handle promotion
-function Sparse(A::SparseMatrixCSC{Tv,SuiteSparse_long}, stype::Integer) where {Tv}
-    T = promote_type(Tv, Float64)
-    return Sparse{T}(A, stype)
-end
-
-# convert SparseVectors into CHOLMOD Sparse types through a mx1 CSC matrix
-Sparse(A::SparseVector) = Sparse(SparseMatrixCSC(A))
-function Sparse(A::SparseMatrixCSC)
-    o = Sparse(A, 0)
-    # check if array is symmetric and change stype if it is
-    if ishermitian(o)
-        change_stype!(o, -1)
-    end
-    o
-end
-
-Sparse(A::Symmetric{Tv, SparseMatrixCSC{Tv,Ti}}) where {Tv<:Real, Ti} =
-    Sparse(A.data, A.uplo == 'L' ? -1 : 1)
-Sparse(A::Hermitian{Tv,SparseMatrixCSC{Tv,Ti}}) where {Tv, Ti} =
-    Sparse(A.data, A.uplo == 'L' ? -1 : 1)
-
-Sparse(A::Dense) = dense_to_sparse(A, SuiteSparse_long)
-Sparse(L::Factor) = factor_to_sparse!(copy(L))
-function Sparse(filename::String)
-    open(filename) do f
-        return read_sparse(f, SuiteSparse_long)
-    end
-end
-
-## conversion back to base Julia types
-function Matrix{T}(D::Dense{T}) where T
-    s = unsafe_load(pointer(D))
-    a = Matrix{T}(undef, s.nrow, s.ncol)
-    copyto!(a, D)
-end
-
-Base.copyto!(dest::Base.PermutedDimsArrays.PermutedDimsArray, src::Dense) = _copy!(dest, src) # ambig
-Base.copyto!(dest::Dense{T}, D::Dense{T}) where {T<:VTypes} = _copy!(dest, D)
-Base.copyto!(dest::AbstractArray{T}, D::Dense{T}) where {T<:VTypes} = _copy!(dest, D)
-Base.copyto!(dest::AbstractArray{T,2}, D::Dense{T}) where {T<:VTypes} = _copy!(dest, D)
-Base.copyto!(dest::AbstractArray, D::Dense) = _copy!(dest, D)
-
-function _copy!(dest::AbstractArray, D::Dense)
-    require_one_based_indexing(dest)
-    s = unsafe_load(pointer(D))
-    n = s.nrow*s.ncol
-    n <= length(dest) || throw(BoundsError(dest, n))
-    if s.d == s.nrow && isa(dest, Array)
-        unsafe_copyto!(pointer(dest), s.x, s.d*s.ncol)
-    else
-        k = 0
-        for j = 1:s.ncol
-            for i = 1:s.nrow
-                dest[k+=1] = unsafe_load(s.x, i + (j - 1)*s.d)
-            end
-        end
-    end
-    dest
-end
-Matrix(D::Dense{T}) where {T} = Matrix{T}(D)
-function Vector{T}(D::Dense{T}) where T
-    if size(D, 2) > 1
-        throw(DimensionMismatch("input must be a vector but had $(size(D, 2)) columns"))
-    end
-    copyto!(Vector{T}(undef, size(D, 1)), D)
-end
-Vector(D::Dense{T}) where {T} = Vector{T}(D)
-
-function SparseMatrixCSC{Tv,SuiteSparse_long}(A::Sparse{Tv}) where Tv
-    s = unsafe_load(pointer(A))
-    if s.stype != 0
-        throw(ArgumentError("matrix has stype != 0. Convert to matrix " *
-            "with stype == 0 before converting to SparseMatrixCSC"))
-    end
-
-    B = SparseMatrixCSC(s.nrow, s.ncol,
-        increment(unsafe_wrap(Array, s.p, (s.ncol + 1,), own = false)),
-        increment(unsafe_wrap(Array, s.i, (s.nzmax,), own = false)),
-        copy(unsafe_wrap(Array, s.x, (s.nzmax,), own = false)))
-
-    if s.sorted == 0
-        return SparseArrays.sortSparseMatrixCSC!(B)
-    else
-        return B
-    end
-end
-
-function Symmetric{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}}(A::Sparse{Float64})
-    s = unsafe_load(pointer(A))
-    if !issymmetric(A)
-        throw(ArgumentError("matrix is not symmetric"))
-    end
-
-    B = Symmetric(SparseMatrixCSC(s.nrow, s.ncol,
-        increment(unsafe_wrap(Array, s.p, (s.ncol + 1,), own = false)),
-        increment(unsafe_wrap(Array, s.i, (s.nzmax,), own = false)),
-        copy(unsafe_wrap(Array, s.x, (s.nzmax,), own = false))), s.stype > 0 ? :U : :L)
-
-    if s.sorted == 0
-        return SparseArrays.sortSparseMatrixCSC!(B.data)
-    else
-        return B
-    end
-end
-convert(T::Type{Symmetric{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}}}, A::Sparse{Float64}) = T(A)
-
-function Hermitian{Tv,SparseMatrixCSC{Tv,SuiteSparse_long}}(A::Sparse{Tv}) where Tv<:VTypes
-    s = unsafe_load(pointer(A))
-    if !ishermitian(A)
-        throw(ArgumentError("matrix is not Hermitian"))
-    end
-
-    B = Hermitian(SparseMatrixCSC(s.nrow, s.ncol,
-        increment(unsafe_wrap(Array, s.p, (s.ncol + 1,), own = false)),
-        increment(unsafe_wrap(Array, s.i, (s.nzmax,), own = false)),
-        copy(unsafe_wrap(Array, s.x, (s.nzmax,), own = false))), s.stype > 0 ? :U : :L)
-
-    if s.sorted == 0
-        return SparseArrays.sortSparseMatrixCSC!(B.data)
-    else
-        return B
-    end
-end
-convert(T::Type{Hermitian{Tv,SparseMatrixCSC{Tv,SuiteSparse_long}}}, A::Sparse{Tv}) where {Tv<:VTypes} = T(A)
-
-function sparse(A::Sparse{Float64}) # Notice! Cannot be type stable because of stype
-    s = unsafe_load(pointer(A))
-    if s.stype == 0
-        return SparseMatrixCSC{Float64,SuiteSparse_long}(A)
-    end
-    return Symmetric{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}}(A)
-end
-function sparse(A::Sparse{ComplexF64}) # Notice! Cannot be type stable because of stype
-    s = unsafe_load(pointer(A))
-    if s.stype == 0
-        return SparseMatrixCSC{ComplexF64,SuiteSparse_long}(A)
-    end
-    return Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,SuiteSparse_long}}(A)
-end
-function sparse(F::Factor)
-    s = unsafe_load(pointer(F))
-    if s.is_ll != 0
-        L = Sparse(F)
-        A = sparse(L*L')
-    else
-        LD = sparse(F.LD)
-        L, d = getLd!(LD)
-        A = (L * Diagonal(d)) * L'
-    end
-    SparseArrays.sortSparseMatrixCSC!(A)
-    p = get_perm(F)
-    if p != [1:s.n;]
-        pinv = Vector{Int}(undef, length(p))
-        for k = 1:length(p)
-            pinv[p[k]] = k
-        end
-        A = A[pinv,pinv]
-    end
-    A
-end
-
-sparse(D::Dense) = sparse(Sparse(D))
-
-function sparse(FC::FactorComponent{Tv,:L}) where Tv
-    F = Factor(FC)
-    s = unsafe_load(pointer(F))
-    if s.is_ll == 0
-        throw(CHOLMODException("sparse: supported only for :LD on LDLt factorizations"))
-    end
-    sparse(Sparse(F))
-end
-sparse(FC::FactorComponent{Tv,:LD}) where {Tv} = sparse(Sparse(Factor(FC)))
-
-# Calculate the offset into the stype field of the cholmod_sparse_struct and
-# change the value
-let offset = fieldoffset(C_Sparse{Float64}, findfirst(name -> name === :stype, fieldnames(C_Sparse{Float64}))::Int)
-    global change_stype!
-    function change_stype!(A::Sparse, i::Integer)
-        unsafe_store!(convert(Ptr{Cint}, pointer(A)), i, div(offset, 4) + 1)
-        return A
-    end
-end
-
-free!(A::Dense)  = free!(pointer(A))
-free!(A::Sparse) = free!(pointer(A))
-free!(F::Factor) = free!(pointer(F))
-
-eltype(::Type{Dense{T}}) where {T<:VTypes} = T
-eltype(::Type{Factor{T}}) where {T<:VTypes} = T
-eltype(::Type{Sparse{T}}) where {T<:VTypes} = T
-
-nnz(F::Factor) = nnz(Sparse(F))
-
-function show(io::IO, F::Factor)
-    println(io, typeof(F))
-    showfactor(io, F)
-end
-
-function show(io::IO, FC::FactorComponent)
-    println(io, typeof(FC))
-    showfactor(io, Factor(FC))
-end
-
-function showfactor(io::IO, F::Factor)
-    s = unsafe_load(pointer(F))
-    print(io, """
-        type:    $(s.is_ll!=0 ? "LLt" : "LDLt")
-        method:  $(s.is_super!=0 ? "supernodal" : "simplicial")
-        maxnnz:  $(Int(s.nzmax))
-        nnz:     $(nnz(F))
-        success: $(s.minor == size(F, 1))
-        """)
-end
-
-# getindex not defined for these, so don't use the normal array printer
-show(io::IO, ::MIME"text/plain", FC::FactorComponent) = show(io, FC)
-show(io::IO, ::MIME"text/plain", F::Factor) = show(io, F)
-
-isvalid(A::Dense) = check_dense(A)
-isvalid(A::Sparse) = check_sparse(A)
-isvalid(A::Factor) = check_factor(A)
-
-function size(A::Union{Dense,Sparse})
-    s = unsafe_load(pointer(A))
-    return (Int(s.nrow), Int(s.ncol))
-end
-function size(F::Factor, i::Integer)
-    if i < 1
-        throw(ArgumentError("dimension must be positive"))
-    end
-    s = unsafe_load(pointer(F))
-    if i <= 2
-        return Int(s.n)
-    end
-    return 1
-end
-size(F::Factor) = (size(F, 1), size(F, 2))
-axes(A::Union{Dense,Sparse,Factor}) = map(Base.OneTo, size(A))
-
-IndexStyle(::Dense) = IndexLinear()
-
-size(FC::FactorComponent, i::Integer) = size(FC.F, i)
-size(FC::FactorComponent) = size(FC.F)
-
-adjoint(FC::FactorComponent{Tv,:L}) where {Tv} = FactorComponent{Tv,:U}(FC.F)
-adjoint(FC::FactorComponent{Tv,:U}) where {Tv} = FactorComponent{Tv,:L}(FC.F)
-adjoint(FC::FactorComponent{Tv,:PtL}) where {Tv} = FactorComponent{Tv,:UP}(FC.F)
-adjoint(FC::FactorComponent{Tv,:UP}) where {Tv} = FactorComponent{Tv,:PtL}(FC.F)
-adjoint(FC::FactorComponent{Tv,:D}) where {Tv} = FC
-adjoint(FC::FactorComponent{Tv,:LD}) where {Tv} = FactorComponent{Tv,:DU}(FC.F)
-adjoint(FC::FactorComponent{Tv,:DU}) where {Tv} = FactorComponent{Tv,:LD}(FC.F)
-adjoint(FC::FactorComponent{Tv,:PtLD}) where {Tv} = FactorComponent{Tv,:DUP}(FC.F)
-adjoint(FC::FactorComponent{Tv,:DUP}) where {Tv} = FactorComponent{Tv,:PtLD}(FC.F)
-
-function getindex(A::Dense, i::Integer)
-    s = unsafe_load(pointer(A))
-    0 < i <= s.nrow*s.ncol || throw(BoundsError())
-    unsafe_load(s.x, i)
-end
-
-IndexStyle(::Sparse) = IndexCartesian()
-function getindex(A::Sparse{T}, i0::Integer, i1::Integer) where T
-    s = unsafe_load(pointer(A))
-    !(1 <= i0 <= s.nrow && 1 <= i1 <= s.ncol) && throw(BoundsError())
-    s.stype < 0 && i0 < i1 && return conj(A[i1,i0])
-    s.stype > 0 && i0 > i1 && return conj(A[i1,i0])
-
-    r1 = Int(unsafe_load(s.p, i1) + 1)
-    r2 = Int(unsafe_load(s.p, i1 + 1))
-    (r1 > r2) && return zero(T)
-    r1 = Int(searchsortedfirst(unsafe_wrap(Array, s.i, (s.nzmax,), own = false),
-        i0 - 1, r1, r2, Base.Order.Forward))
-    ((r1 > r2) || (unsafe_load(s.i, r1) + 1 != i0)) ? zero(T) : unsafe_load(s.x, r1)
-end
-
-@inline function getproperty(F::Factor, sym::Symbol)
-    if sym === :p
-        return get_perm(F)
-    elseif sym === :ptr
-        return getfield(F, :ptr)
-    else
-        return FactorComponent(F, sym)
-    end
-end
-
-function getLd!(S::SparseMatrixCSC)
-    d = Vector{eltype(S)}(undef, size(S, 1))
-    fill!(d, 0)
-    col = 1
-    for k = 1:nnz(S)
-        while k >= getcolptr(S)[col+1]
-            col += 1
-        end
-        if rowvals(S)[k] == col
-            d[col] = nonzeros(S)[k]
-            nonzeros(S)[k] = 1
-        end
-    end
-    S, d
-end
-
-## Multiplication
-(*)(A::Sparse, B::Sparse) = ssmult(A, B, 0, true, true)
-(*)(A::Sparse, B::Dense) = sdmult!(A, false, 1., 0., B, zeros(size(A, 1), size(B, 2)))
-(*)(A::Sparse, B::VecOrMat) = (*)(A, Dense(B))
-
-function *(A::Sparse{Tv}, adjB::Adjoint{Tv,Sparse{Tv}}) where Tv<:VRealTypes
-    B = adjB.parent
-    if A !== B
-        aa1 = transpose_(B, 2)
-        ## result of ssmult will have stype==0, contain numerical values and be sorted
-        return ssmult(A, aa1, 0, true, true)
-    end
-
-    ## The A*A' case is handled by cholmod_aat. This routine requires
-    ## A->stype == 0 (storage of upper and lower parts). If necessary
-    ## the matrix A is first converted to stype == 0
-    s = unsafe_load(pointer(A))
-    fset = s.ncol == 0 ? SuiteSparse_long[] : SuiteSparse_long[0:s.ncol-1;]
-    if s.stype != 0
-        aa1 = copy(A, 0, 1)
-        return aat(aa1, fset, 1)
-    else
-        return aat(A, fset, 1)
-    end
-end
-
-function *(adjA::Adjoint{<:Any,<:Sparse}, B::Sparse)
-    A = adjA.parent
-    aa1 = transpose_(A, 2)
-    if A === B
-        return *(aa1, adjoint(aa1))
-    end
-    ## result of ssmult will have stype==0, contain numerical values and be sorted
-    return ssmult(aa1, B, 0, true, true)
-end
-
-*(adjA::Adjoint{<:Any,<:Sparse}, B::Dense) =
-    (A = adjA.parent; sdmult!(A, true, 1., 0., B, zeros(size(A, 2), size(B, 2))))
-*(adjA::Adjoint{<:Any,<:Sparse}, B::VecOrMat) =
-    (A = adjA.parent; *(adjoint(A), Dense(B)))
-
-
-## Factorization methods
-
-## Compute that symbolic factorization only
-function fact_(A::Sparse{<:VTypes}, cm::Array{UInt8};
-    perm::Union{Nothing,AbstractVector{SuiteSparse_long}}=nothing,
-    postorder::Bool=true, userperm_only::Bool=true)
-
-    sA = unsafe_load(pointer(A))
-    sA.stype == 0 && throw(ArgumentError("sparse matrix is not symmetric/Hermitian"))
-
-    if !postorder
-        unsafe_store!(common_postorder[Threads.threadid()], 0)
-    end
-
-    if perm === nothing || isempty(perm) # TODO: deprecate empty perm
-        F = analyze(A, cm)
-    else # user permutation provided
-        if userperm_only # use perm even if it is worse than AMD
-            unsafe_store!(common_nmethods[Threads.threadid()], 1)
-        end
-        F = analyze_p(A, SuiteSparse_long[p-1 for p in perm], cm)
-    end
-
-    return F
-end
-
-function cholesky!(F::Factor{Tv}, A::Sparse{Tv};
-                   shift::Real=0.0, check::Bool = true) where Tv
-    # Makes it an LLt
-    unsafe_store!(common_final_ll[Threads.threadid()], 1)
-
-    # Compute the numerical factorization
-    factorize_p!(A, shift, F, common_struct[Threads.threadid()])
-
-    check && (issuccess(F) || throw(LinearAlgebra.PosDefException(1)))
-    return F
-end
-
-"""
-    cholesky!(F::CHOLMOD.Factor, A::SparseMatrixCSC; shift = 0.0, check = true) -> CHOLMOD.Factor
-
-Compute the Cholesky (``LL'``) factorization of `A`, reusing the symbolic
-factorization `F`. `A` must be a [`SparseMatrixCSC`](@ref) or a [`Symmetric`](@ref)/
-[`Hermitian`](@ref) view of a `SparseMatrixCSC`. Note that even if `A` doesn't
-have the type tag, it must still be symmetric or Hermitian.
-
-See also [`cholesky`](@ref).
-
-!!! note
-    This method uses the CHOLMOD library from SuiteSparse, which only supports
-    doubles or complex doubles. Input matrices not of those element types will
-    be converted to `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}`
-    as appropriate.
-"""
-cholesky!(F::Factor, A::Union{SparseMatrixCSC{T},
-          SparseMatrixCSC{Complex{T}},
-          Symmetric{T,SparseMatrixCSC{T,SuiteSparse_long}},
-          Hermitian{Complex{T},SparseMatrixCSC{Complex{T},SuiteSparse_long}},
-          Hermitian{T,SparseMatrixCSC{T,SuiteSparse_long}}};
-          shift = 0.0, check::Bool = true) where {T<:Real} =
-    cholesky!(F, Sparse(A); shift = shift, check = check)
-
-function cholesky(A::Sparse; shift::Real=0.0, check::Bool = true,
-    perm::Union{Nothing,AbstractVector{SuiteSparse_long}}=nothing)
-
-    cm = defaults(common_struct[Threads.threadid()])
-    set_print_level(cm, 0)
-
-    # Compute the symbolic factorization
-    F = fact_(A, cm; perm = perm)
-
-    # Compute the numerical factorization
-    cholesky!(F, A; shift = shift, check = check)
-
-    return F
-end
-
-"""
-    cholesky(A::SparseMatrixCSC; shift = 0.0, check = true, perm = nothing) -> CHOLMOD.Factor
-
-Compute the Cholesky factorization of a sparse positive definite matrix `A`.
-`A` must be a [`SparseMatrixCSC`](@ref) or a [`Symmetric`](@ref)/[`Hermitian`](@ref)
-view of a `SparseMatrixCSC`. Note that even if `A` doesn't
-have the type tag, it must still be symmetric or Hermitian.
-If `perm` is not given, a fill-reducing permutation is used.
-`F = cholesky(A)` is most frequently used to solve systems of equations with `F\\b`,
-but also the methods [`diag`](@ref), [`det`](@ref), and
-[`logdet`](@ref) are defined for `F`.
-You can also extract individual factors from `F`, using `F.L`.
-However, since pivoting is on by default, the factorization is internally
-represented as `A == P'*L*L'*P` with a permutation matrix `P`;
-using just `L` without accounting for `P` will give incorrect answers.
-To include the effects of permutation,
-it's typically preferable to extract "combined" factors like `PtL = F.PtL`
-(the equivalent of `P'*L`) and `LtP = F.UP` (the equivalent of `L'*P`).
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-Setting the optional `shift` keyword argument computes the factorization of
-`A+shift*I` instead of `A`. If the `perm` argument is provided,
-it should be a permutation of `1:size(A,1)` giving the ordering to use
-(instead of CHOLMOD's default AMD ordering).
-
-# Examples
-
-In the following example, the fill-reducing permutation used is `[3, 2, 1]`.
-If `perm` is set to `1:3` to enforce no permutation, the number of nonzero
-elements in the factor is 6.
-```jldoctest
-julia> A = [2 1 1; 1 2 0; 1 0 2]
-3×3 Matrix{Int64}:
- 2  1  1
- 1  2  0
- 1  0  2
-
-julia> C = cholesky(sparse(A))
-SuiteSparse.CHOLMOD.Factor{Float64}
-type:    LLt
-method:  simplicial
-maxnnz:  5
-nnz:     5
-success: true
-
-julia> C.p
-3-element Vector{Int64}:
- 3
- 2
- 1
-
-julia> L = sparse(C.L);
-
-julia> Matrix(L)
-3×3 Matrix{Float64}:
- 1.41421   0.0       0.0
- 0.0       1.41421   0.0
- 0.707107  0.707107  1.0
-
-julia> L * L' ≈ A[C.p, C.p]
-true
-
-julia> P = sparse(1:3, C.p, ones(3))
-3×3 SparseMatrixCSC{Float64, Int64} with 3 stored entries:
-  ⋅    ⋅   1.0
-  ⋅   1.0   ⋅
- 1.0   ⋅    ⋅
-
-julia> P' * L * L' * P ≈ A
-true
-
-julia> C = cholesky(sparse(A), perm=1:3)
-SuiteSparse.CHOLMOD.Factor{Float64}
-type:    LLt
-method:  simplicial
-maxnnz:  6
-nnz:     6
-success: true
-
-julia> L = sparse(C.L);
-
-julia> Matrix(L)
-3×3 Matrix{Float64}:
- 1.41421    0.0       0.0
- 0.707107   1.22474   0.0
- 0.707107  -0.408248  1.1547
-
-julia> L * L' ≈ A
-true
-```
-
-!!! note
-    This method uses the CHOLMOD library from SuiteSparse, which only supports
-    doubles or complex doubles. Input matrices not of those element types will
-    be converted to `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}`
-    as appropriate.
-
-    Many other functions from CHOLMOD are wrapped but not exported from the
-    `Base.SparseArrays.CHOLMOD` module.
-"""
-cholesky(A::Union{SparseMatrixCSC{T}, SparseMatrixCSC{Complex{T}},
-    Symmetric{T,SparseMatrixCSC{T,SuiteSparse_long}},
-    Hermitian{Complex{T},SparseMatrixCSC{Complex{T},SuiteSparse_long}},
-    Hermitian{T,SparseMatrixCSC{T,SuiteSparse_long}}};
-    kws...) where {T<:Real} = cholesky(Sparse(A); kws...)
-
-
-function ldlt!(F::Factor{Tv}, A::Sparse{Tv};
-               shift::Real=0.0, check::Bool = true) where Tv
-    cm = defaults(common_struct[Threads.threadid()])
-    set_print_level(cm, 0)
-
-    # Makes it an LDLt
-    change_factor!(F, false, false, true, false)
-
-    # Compute the numerical factorization
-    factorize_p!(A, shift, F, cm)
-
-    check && (issuccess(F) || throw(LinearAlgebra.ZeroPivotException(1)))
-    return F
-end
-
-"""
-    ldlt!(F::CHOLMOD.Factor, A::SparseMatrixCSC; shift = 0.0, check = true) -> CHOLMOD.Factor
-
-Compute the ``LDL'`` factorization of `A`, reusing the symbolic factorization `F`.
-`A` must be a [`SparseMatrixCSC`](@ref) or a [`Symmetric`](@ref)/[`Hermitian`](@ref)
-view of a `SparseMatrixCSC`. Note that even if `A` doesn't
-have the type tag, it must still be symmetric or Hermitian.
-
-See also [`ldlt`](@ref).
-
-!!! note
-    This method uses the CHOLMOD library from SuiteSparse, which only supports
-    doubles or complex doubles. Input matrices not of those element types will
-    be converted to `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}`
-    as appropriate.
-"""
-ldlt!(F::Factor, A::Union{SparseMatrixCSC{T},
-    SparseMatrixCSC{Complex{T}},
-    Symmetric{T,SparseMatrixCSC{T,SuiteSparse_long}},
-    Hermitian{Complex{T},SparseMatrixCSC{Complex{T},SuiteSparse_long}},
-    Hermitian{T,SparseMatrixCSC{T,SuiteSparse_long}}};
-    shift = 0.0, check::Bool = true) where {T<:Real} =
-    ldlt!(F, Sparse(A), shift = shift, check = check)
-
-function ldlt(A::Sparse; shift::Real=0.0, check::Bool = true,
-    perm::Union{Nothing,AbstractVector{SuiteSparse_long}}=nothing)
-
-    cm = defaults(common_struct[Threads.threadid()])
-    set_print_level(cm, 0)
-
-    # Makes it an LDLt
-    unsafe_store!(common_final_ll[Threads.threadid()], 0)
-    # Really make sure it's an LDLt by avoiding supernodal factorization
-    unsafe_store!(common_supernodal[Threads.threadid()], 0)
-
-    # Compute the symbolic factorization
-    F = fact_(A, cm; perm = perm)
-
-    # Compute the numerical factorization
-    ldlt!(F, A; shift = shift, check = check)
-
-    return F
-end
-
-"""
-    ldlt(A::SparseMatrixCSC; shift = 0.0, check = true, perm=nothing) -> CHOLMOD.Factor
-
-Compute the ``LDL'`` factorization of a sparse matrix `A`.
-`A` must be a [`SparseMatrixCSC`](@ref) or a [`Symmetric`](@ref)/[`Hermitian`](@ref)
-view of a `SparseMatrixCSC`. Note that even if `A` doesn't
-have the type tag, it must still be symmetric or Hermitian.
-A fill-reducing permutation is used. `F = ldlt(A)` is most frequently
-used to solve systems of equations `A*x = b` with `F\\b`. The returned
-factorization object `F` also supports the methods [`diag`](@ref),
-[`det`](@ref), [`logdet`](@ref), and [`inv`](@ref).
-You can extract individual factors from `F` using `F.L`.
-However, since pivoting is on by default, the factorization is internally
-represented as `A == P'*L*D*L'*P` with a permutation matrix `P`;
-using just `L` without accounting for `P` will give incorrect answers.
-To include the effects of permutation, it is typically preferable to extract
-"combined" factors like `PtL = F.PtL` (the equivalent of
-`P'*L`) and `LtP = F.UP` (the equivalent of `L'*P`).
-The complete list of supported factors is `:L, :PtL, :D, :UP, :U, :LD, :DU, :PtLD, :DUP`.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-Setting the optional `shift` keyword argument computes the factorization of
-`A+shift*I` instead of `A`. If the `perm` argument is provided,
-it should be a permutation of `1:size(A,1)` giving the ordering to use
-(instead of CHOLMOD's default AMD ordering).
-
-!!! note
-    This method uses the CHOLMOD library from SuiteSparse, which only supports
-    doubles or complex doubles. Input matrices not of those element types will
-    be converted to `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}`
-    as appropriate.
-
-    Many other functions from CHOLMOD are wrapped but not exported from the
-    `Base.SparseArrays.CHOLMOD` module.
-"""
-ldlt(A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}},
-    Symmetric{T,SparseMatrixCSC{T,SuiteSparse_long}},
-    Hermitian{Complex{T},SparseMatrixCSC{Complex{T},SuiteSparse_long}},
-    Hermitian{T,SparseMatrixCSC{T,SuiteSparse_long}}};
-    kws...) where {T<:Real} = ldlt(Sparse(A); kws...)
-
-## Rank updates
-
-"""
-    lowrankupdowndate!(F::CHOLMOD.Factor, C::Sparse, update::Cint)
-
-Update an `LDLt` or `LLt` Factorization `F` of `A` to a factorization of `A ± C*C'`.
-
-If sparsity preserving factorization is used, i.e. `L*L' == P*A*P'` then the new
-factor will be `L*L' == P*A*P' + C'*C`
-
-`update`: `Cint(1)` for `A + CC'`, `Cint(0)` for `A - CC'`
-"""
-function lowrankupdowndate!(F::Factor{Tv}, C::Sparse{Tv}, update::Cint) where Tv<:VTypes
-    lF = unsafe_load(pointer(F))
-    lC = unsafe_load(pointer(C))
-    if lF.n != lC.nrow
-        throw(DimensionMismatch("matrix dimensions do not fit"))
-    end
-    @isok ccall((@cholmod_name("updown"), :libcholmod), Cint,
-        (Cint, Ptr{C_Sparse{Tv}}, Ptr{C_Factor{Tv}}, Ptr{Cvoid}),
-        update, C, F, common_struct[Threads.threadid()])
-    F
-end
-
-#Helper functions for rank updates
-lowrank_reorder(V::AbstractArray,p) = Sparse(sparse(V[p,:]))
-lowrank_reorder(V::AbstractSparseArray,p) = Sparse(V[p,:])
-
-"""
-    lowrankupdate!(F::CHOLMOD.Factor, C::AbstractArray)
-
-Update an `LDLt` or `LLt` Factorization `F` of `A` to a factorization of `A + C*C'`.
-
-`LLt` factorizations are converted to `LDLt`.
-
-See also [`lowrankupdate`](@ref), [`lowrankdowndate`](@ref), [`lowrankdowndate!`](@ref).
-"""
-function lowrankupdate!(F::Factor{Tv}, V::AbstractArray{Tv}) where Tv<:VTypes
-    #Reorder and copy V to account for permutation
-    C = lowrank_reorder(V, get_perm(F))
-    lowrankupdowndate!(F, C, Cint(1))
-end
-
-"""
-    lowrankdowndate!(F::CHOLMOD.Factor, C::AbstractArray)
-
-Update an `LDLt` or `LLt` Factorization `F` of `A` to a factorization of `A - C*C'`.
-
-`LLt` factorizations are converted to `LDLt`.
-
-See also [`lowrankdowndate`](@ref), [`lowrankupdate`](@ref), [`lowrankupdate!`](@ref).
-"""
-function lowrankdowndate!(F::Factor{Tv}, V::AbstractArray{Tv}) where Tv<:VTypes
-    #Reorder and copy V to account for permutation
-    C = lowrank_reorder(V, get_perm(F))
-    lowrankupdowndate!(F, C, Cint(0))
-end
-
-"""
-    lowrankupdate(F::CHOLMOD.Factor, C::AbstractArray) -> FF::CHOLMOD.Factor
-
-Get an `LDLt` Factorization of `A + C*C'` given an `LDLt` or `LLt` factorization `F` of `A`.
-
-The returned factor is always an `LDLt` factorization.
-
-See also [`lowrankupdate!`](@ref), [`lowrankdowndate`](@ref), [`lowrankdowndate!`](@ref).
-"""
-lowrankupdate(F::Factor{Tv}, V::AbstractArray{Tv}) where {Tv<:VTypes} =
-    lowrankupdate!(copy(F), V)
-
-"""
-    lowrankupdate(F::CHOLMOD.Factor, C::AbstractArray) -> FF::CHOLMOD.Factor
-
-Get an `LDLt` Factorization of `A + C*C'` given an `LDLt` or `LLt` factorization `F` of `A`.
-
-The returned factor is always an `LDLt` factorization.
-
-See also [`lowrankdowndate!`](@ref), [`lowrankupdate`](@ref), [`lowrankupdate!`](@ref).
-"""
-lowrankdowndate(F::Factor{Tv}, V::AbstractArray{Tv}) where {Tv<:VTypes} =
-    lowrankdowndate!(copy(F), V)
-
-## Solvers
-
-for (T, f) in ((:Dense, :solve), (:Sparse, :spsolve))
-    @eval begin
-        # Solve Lx = b and L'x=b where A = L*L'
-        function (\)(L::FactorComponent{T,:L}, B::$T) where T
-            ($f)(CHOLMOD_L, Factor(L), B)
-        end
-        function (\)(L::FactorComponent{T,:U}, B::$T) where T
-            ($f)(CHOLMOD_Lt, Factor(L), B)
-        end
-        # Solve PLx = b and L'P'x=b where A = P*L*L'*P'
-        function (\)(L::FactorComponent{T,:PtL}, B::$T) where T
-            F = Factor(L)
-            ($f)(CHOLMOD_L, F, ($f)(CHOLMOD_P, F, B)) # Confusingly, CHOLMOD_P solves P'x = b
-        end
-        function (\)(L::FactorComponent{T,:UP}, B::$T) where T
-            F = Factor(L)
-            ($f)(CHOLMOD_Pt, F, ($f)(CHOLMOD_Lt, F, B))
-        end
-        # Solve various equations for A = L*D*L' and A = P*L*D*L'*P'
-        function (\)(L::FactorComponent{T,:D}, B::$T) where T
-            ($f)(CHOLMOD_D, Factor(L), B)
-        end
-        function (\)(L::FactorComponent{T,:LD}, B::$T) where T
-            ($f)(CHOLMOD_LD, Factor(L), B)
-        end
-        function (\)(L::FactorComponent{T,:DU}, B::$T) where T
-            ($f)(CHOLMOD_DLt, Factor(L), B)
-        end
-        function (\)(L::FactorComponent{T,:PtLD}, B::$T) where T
-            F = Factor(L)
-            ($f)(CHOLMOD_LD, F, ($f)(CHOLMOD_P, F, B))
-        end
-        function (\)(L::FactorComponent{T,:DUP}, B::$T) where T
-            F = Factor(L)
-            ($f)(CHOLMOD_Pt, F, ($f)(CHOLMOD_DLt, F, B))
-        end
-    end
-end
-
-SparseVecOrMat{Tv,Ti} = Union{SparseVector{Tv,Ti}, SparseMatrixCSC{Tv,Ti}}
-
-function (\)(L::FactorComponent, b::Vector)
-    reshape(Matrix(L\Dense(b)), length(b))
-end
-function (\)(L::FactorComponent, B::Matrix)
-    Matrix(L\Dense(B))
-end
-function (\)(L::FactorComponent, B::SparseVecOrMat)
-    sparse(L\Sparse(B,0))
-end
-(\)(L::FactorComponent, B::Adjoint{<:Any,<:SparseMatrixCSC}) = L \ copy(B)
-(\)(L::FactorComponent, B::Transpose{<:Any,<:SparseMatrixCSC}) = L \ copy(B)
-
-\(adjL::Adjoint{<:Any,<:FactorComponent}, B::Union{VecOrMat,SparseVecOrMat}) = (L = adjL.parent; adjoint(L)\B)
-
-(\)(L::Factor{T}, B::Dense{T}) where {T<:VTypes} = solve(CHOLMOD_A, L, B)
-# Explicit typevars are necessary to avoid ambiguities with defs in linalg/factorizations.jl
-# Likewise the two following explicit Vector and Matrix defs (rather than a single VecOrMat)
-(\)(L::Factor{T}, B::Vector{Complex{T}}) where {T<:Float64} = complex.(L\real(B), L\imag(B))
-(\)(L::Factor{T}, B::Matrix{Complex{T}}) where {T<:Float64} = complex.(L\real(B), L\imag(B))
-(\)(L::Factor{T}, B::Adjoint{<:Any, <:Matrix{Complex{T}}}) where {T<:Float64} = complex.(L\real(B), L\imag(B))
-(\)(L::Factor{T}, B::Transpose{<:Any, <:Matrix{Complex{T}}}) where {T<:Float64} = complex.(L\real(B), L\imag(B))
-
-(\)(L::Factor{T}, b::StridedVector) where {T<:VTypes} = Vector(L\Dense{T}(b))
-(\)(L::Factor{T}, B::StridedMatrix) where {T<:VTypes} = Matrix(L\Dense{T}(B))
-(\)(L::Factor{T}, B::Adjoint{<:Any, <:StridedMatrix}) where {T<:VTypes} = Matrix(L\Dense{T}(B))
-(\)(L::Factor{T}, B::Transpose{<:Any, <:StridedMatrix}) where {T<:VTypes} = Matrix(L\Dense{T}(B))
-
-(\)(L::Factor, B::Sparse) = spsolve(CHOLMOD_A, L, B)
-# When right hand side is sparse, we have to ensure that the rhs is not marked as symmetric.
-(\)(L::Factor, B::SparseMatrixCSC) = sparse(spsolve(CHOLMOD_A, L, Sparse(B, 0)))
-(\)(L::Factor, B::Adjoint{<:Any,<:SparseMatrixCSC}) = L \ copy(B)
-(\)(L::Factor, B::Transpose{<:Any,<:SparseMatrixCSC}) = L \ copy(B)
-(\)(L::Factor, B::SparseVector) = sparse(spsolve(CHOLMOD_A, L, Sparse(B)))
-
-\(adjL::Adjoint{<:Any,<:Factor}, B::Dense) = (L = adjL.parent; solve(CHOLMOD_A, L, B))
-\(adjL::Adjoint{<:Any,<:Factor}, B::Sparse) = (L = adjL.parent; spsolve(CHOLMOD_A, L, B))
-\(adjL::Adjoint{<:Any,<:Factor}, B::SparseVecOrMat) = (L = adjL.parent; \(adjoint(L), Sparse(B)))
-
-function \(adjL::Adjoint{<:Any,<:Factor}, b::StridedVector)
-    L = adjL.parent
-    return Vector(solve(CHOLMOD_A, L, Dense(b)))
-end
-function \(adjL::Adjoint{<:Any,<:Factor}, B::StridedMatrix)
-    L = adjL.parent
-    return Matrix(solve(CHOLMOD_A, L, Dense(B)))
-end
-
-const RealHermSymComplexHermF64SSL = Union{
-    Symmetric{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}},
-    Hermitian{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}},
-    Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,SuiteSparse_long}}}
-const StridedVecOrMatInclAdjAndTrans = Union{StridedVecOrMat, Adjoint{<:Any, <:StridedVecOrMat}, Transpose{<:Any, <:StridedVecOrMat}}
-function \(A::RealHermSymComplexHermF64SSL, B::StridedVecOrMatInclAdjAndTrans)
-    F = cholesky(A; check = false)
-    if issuccess(F)
-        return \(F, B)
-    else
-        ldlt!(F, A; check = false)
-        if issuccess(F)
-            return \(F, B)
-        else
-            return \(lu(SparseMatrixCSC{eltype(A), SuiteSparse_long}(A)), B)
-        end
-    end
-end
-function \(adjA::Adjoint{<:Any,<:RealHermSymComplexHermF64SSL}, B::StridedVecOrMatInclAdjAndTrans)
-    A = adjA.parent
-    F = cholesky(A; check = false)
-    if issuccess(F)
-        return \(adjoint(F), B)
-    else
-        ldlt!(F, A; check = false)
-        if issuccess(F)
-            return \(adjoint(F), B)
-        else
-            return \(adjoint(lu(SparseMatrixCSC{eltype(A), SuiteSparse_long}(A))), B)
-        end
-    end
-end
-
-## Other convenience methods
-function diag(F::Factor{Tv}) where Tv
-    f = unsafe_load(pointer(F))
-    fsuper = f.super
-    fpi = f.pi
-    res = Base.zeros(Tv, Int(f.n))
-    xv  = f.x
-    if f.is_super!=0
-        px = f.px
-        pos = 1
-        for i in 1:f.nsuper
-            base = unsafe_load(px, i) + 1
-            res[pos] = unsafe_load(xv, base)
-            pos += 1
-            for j in 1:unsafe_load(fsuper, i + 1) - unsafe_load(fsuper, i) - 1
-                res[pos] = unsafe_load(xv, base + j*(unsafe_load(fpi, i + 1) -
-                    unsafe_load(fpi, i) + 1))
-                pos += 1
-            end
-        end
-    else
-        c0 = f.p
-        r0 = f.i
-        xv = f.x
-        for j in 1:f.n
-            jj = unsafe_load(c0, j) + 1
-            @assert(unsafe_load(r0, jj) == j - 1)
-            res[j] = unsafe_load(xv, jj)
-        end
-    end
-    res
-end
-
-function logdet(F::Factor{Tv}) where Tv<:VTypes
-    f = unsafe_load(pointer(F))
-    res = zero(Tv)
-    for d in diag(F); res += log(abs(d)) end
-    f.is_ll != 0 ? 2res : res
-end
-
-det(L::Factor) = exp(logdet(L))
-
-function issuccess(F::Factor)
-    s = unsafe_load(pointer(F))
-    return s.minor == size(F, 1)
-end
-
-function isposdef(F::Factor)
-    if issuccess(F)
-        s = unsafe_load(pointer(F))
-        if s.is_ll == 1
-            return true
-        else
-            # try conversion to LLt
-            change_factor!(F, true, s.is_super, true, s.is_monotonic)
-            b = issuccess(F)
-            # convert back
-            change_factor!(F, false, s.is_super, true, s.is_monotonic)
-            return b
-        end
-    else
-        return false
-    end
-end
-
-function ishermitian(A::Sparse{Float64})
-    s = unsafe_load(pointer(A))
-    if s.stype != 0
-        return true
-    else
-        i = symmetry(A, 1)[1]
-        if i < 0
-            throw(CHOLMODException("negative value returned from CHOLMOD's symmetry function. This
-                is either because the indices are not sorted or because of a memory error"))
-        end
-        return i == MM_SYMMETRIC || i == MM_SYMMETRIC_POSDIAG
-    end
-end
-function ishermitian(A::Sparse{ComplexF64})
-    s = unsafe_load(pointer(A))
-    if s.stype != 0
-        return true
-    else
-        i = symmetry(A, 1)[1]
-        if i < 0
-            throw(CHOLMODException("negative value returned from CHOLMOD's symmetry function. This
-                is either because the indices are not sorted or because of a memory error"))
-        end
-        return i == MM_HERMITIAN || i == MM_HERMITIAN_POSDIAG
-    end
-end
-
-(*)(A::Symmetric{Float64,SparseMatrixCSC{Float64,Ti}},
-    B::SparseVecOrMat{Float64,Ti}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-(*)(A::Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,Ti}},
-    B::SparseVecOrMat{ComplexF64,Ti}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-(*)(A::Hermitian{Float64,SparseMatrixCSC{Float64,Ti}},
-    B::SparseVecOrMat{Float64,Ti}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-
-(*)(A::SparseVecOrMat{Float64,Ti},
-    B::Symmetric{Float64,SparseMatrixCSC{Float64,Ti}}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-(*)(A::SparseVecOrMat{ComplexF64,Ti},
-    B::Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,Ti}}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-(*)(A::SparseVecOrMat{Float64,Ti},
-    B::Hermitian{Float64,SparseMatrixCSC{Float64,Ti}}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-
-end #module
diff --git a/stdlib/SuiteSparse/src/cholmod_h.jl b/stdlib/SuiteSparse/src/cholmod_h.jl
deleted file mode 100644
index 26bb046b45dfca..00000000000000
--- a/stdlib/SuiteSparse/src/cholmod_h.jl
+++ /dev/null
@@ -1,79 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## CHOLMOD
-const TRUE  = Int32(1)
-const FALSE = Int32(0)
-
-## itype defines the types of integer used:
-const INT     = Int32(0)  # all integer arrays are int
-const INTLONG = Int32(1)  # most are int, some are SuiteSparse_long
-const LONG    = Int32(2)  # all integer arrays are SuiteSparse_long
-
-## dtype defines what the numerical type is (double or float):
-const DOUBLE = Int32(0)        # all numerical values are double
-const SINGLE = Int32(1)        # all numerical values are float
-dtyp(::Type{Float32}) = SINGLE
-dtyp(::Type{Float64}) = DOUBLE
-dtyp(::Type{ComplexF32}) = SINGLE
-dtyp(::Type{ComplexF64}) = DOUBLE
-
-## xtype defines the kind of numerical values used:
-const PATTERN = Int32(0)       # pattern only, no numerical values
-const REAL    = Int32(1)       # a real matrix
-const COMPLEX = Int32(2)       # a complex matrix (ANSI C99 compatible)
-const ZOMPLEX = Int32(3)       # a complex matrix (MATLAB compatible)
-xtyp(::Type{Float32})    = REAL
-xtyp(::Type{Float64})    = REAL
-xtyp(::Type{ComplexF32}) = COMPLEX
-xtyp(::Type{ComplexF64}) = COMPLEX
-
-## Scaling modes, selected by the scale input parameter:
-const SCALAR = Int32(0)        # A = s*A
-const ROW    = Int32(1)        # A = diag(s)*A
-const COL    = Int32(2)        # A = A*diag(s)
-const SYM    = Int32(3)        # A = diag(s)*A*diag(s)
-
-## Types of systems to solve
-const CHOLMOD_A    = Int32(0)          # solve Ax=b
-const CHOLMOD_LDLt = Int32(1)          # solve LDL'x=b
-const CHOLMOD_LD   = Int32(2)          # solve LDx=b
-const CHOLMOD_DLt  = Int32(3)          # solve DL'x=b
-const CHOLMOD_L    = Int32(4)          # solve Lx=b
-const CHOLMOD_Lt   = Int32(5)          # solve L'x=b
-const CHOLMOD_D    = Int32(6)          # solve Dx=b
-const CHOLMOD_P    = Int32(7)          # permute x=Px
-const CHOLMOD_Pt   = Int32(8)          # permute x=P'x
-
-# Symmetry types
-const EMPTY                 =-1
-const MM_RECTANGULAR        = 1
-const MM_UNSYMMETRIC        = 2
-const MM_SYMMETRIC          = 3
-const MM_HERMITIAN          = 4
-const MM_SKEW_SYMMETRIC     = 5
-const MM_SYMMETRIC_POSDIAG  = 6
-const MM_HERMITIAN_POSDIAG  = 7
-
-# check the size of SuiteSparse_long
-if Int(ccall((:jl_cholmod_sizeof_long, :libsuitesparse_wrapper),Csize_t,())) == 4
-    const SuiteSparse_long = Int32
-    const IndexTypes = (:Int32,)
-    const ITypes = Union{Int32}
-else
-    const SuiteSparse_long = Int64
-    const IndexTypes = (:Int32, :Int64)
-    const ITypes = Union{Int32, Int64}
-end
-ityp(::Type{SuiteSparse_long}) = LONG
-
-
-const VTypes = Union{ComplexF64, Float64}
-const VRealTypes = Union{Float64}
-
-struct CHOLMODException <: Exception
-    msg::AbstractString
-end
-
-macro isok(A)
-    :($(esc(A)) == TRUE || throw(CHOLMODException("")))
-end
diff --git a/stdlib/SuiteSparse/src/deprecated.jl b/stdlib/SuiteSparse/src/deprecated.jl
deleted file mode 100644
index ee28d60dc44066..00000000000000
--- a/stdlib/SuiteSparse/src/deprecated.jl
+++ /dev/null
@@ -1 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
diff --git a/stdlib/SuiteSparse/src/spqr.jl b/stdlib/SuiteSparse/src/spqr.jl
deleted file mode 100644
index e94fa73994d03e..00000000000000
--- a/stdlib/SuiteSparse/src/spqr.jl
+++ /dev/null
@@ -1,416 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module SPQR
-
-import Base: \
-using Base: require_one_based_indexing
-using LinearAlgebra
-
-# ordering options */
-const ORDERING_FIXED   = Int32(0)
-const ORDERING_NATURAL = Int32(1)
-const ORDERING_COLAMD  = Int32(2)
-const ORDERING_GIVEN   = Int32(3) # only used for C/C++ interface
-const ORDERING_CHOLMOD = Int32(4) # CHOLMOD best-effort (COLAMD, METIS,...)
-const ORDERING_AMD     = Int32(5) # AMD(A'*A)
-const ORDERING_METIS   = Int32(6) # metis(A'*A)
-const ORDERING_DEFAULT = Int32(7) # SuiteSparseQR default ordering
-const ORDERING_BEST    = Int32(8) # try COLAMD, AMD, and METIS; pick best
-const ORDERING_BESTAMD = Int32(9) # try COLAMD and AMD; pick best#
-const ORDERINGS = [ORDERING_FIXED, ORDERING_NATURAL, ORDERING_COLAMD, ORDERING_CHOLMOD,
-                   ORDERING_AMD, ORDERING_METIS, ORDERING_DEFAULT, ORDERING_BEST,
-                   ORDERING_BESTAMD]
-
-# Let [m n] = size of the matrix after pruning singletons.  The default
-# ordering strategy is to use COLAMD if m <= 2*n.  Otherwise, AMD(A'A) is
-# tried.  If there is a high fill-in with AMD then try METIS(A'A) and take
-# the best of AMD and METIS. METIS is not tried if it isn't installed.
-
-using SparseArrays
-using SparseArrays: getcolptr
-using ..SuiteSparse.CHOLMOD
-using ..SuiteSparse.CHOLMOD: change_stype!, free!
-
-function _qr!(ordering::Integer, tol::Real, econ::Integer, getCTX::Integer,
-        A::Sparse{Tv},
-        Bsparse::Union{Sparse{Tv}                      , Ptr{Cvoid}} = C_NULL,
-        Bdense::Union{Dense{Tv}                        , Ptr{Cvoid}} = C_NULL,
-        Zsparse::Union{Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}  , Ptr{Cvoid}} = C_NULL,
-        Zdense::Union{Ref{Ptr{CHOLMOD.C_Dense{Tv}}}    , Ptr{Cvoid}} = C_NULL,
-        R::Union{Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}        , Ptr{Cvoid}} = C_NULL,
-        E::Union{Ref{Ptr{CHOLMOD.SuiteSparse_long}}    , Ptr{Cvoid}} = C_NULL,
-        H::Union{Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}        , Ptr{Cvoid}} = C_NULL,
-        HPinv::Union{Ref{Ptr{CHOLMOD.SuiteSparse_long}}, Ptr{Cvoid}} = C_NULL,
-        HTau::Union{Ref{Ptr{CHOLMOD.C_Dense{Tv}}}      , Ptr{Cvoid}} = C_NULL) where {Tv<:CHOLMOD.VTypes}
-
-    ordering ∈ ORDERINGS || error("unknown ordering $ordering")
-
-    AA   = unsafe_load(pointer(A))
-    m, n = AA.nrow, AA.ncol
-    rnk  = ccall((:SuiteSparseQR_C, :libspqr), CHOLMOD.SuiteSparse_long,
-        (Cint, Cdouble, CHOLMOD.SuiteSparse_long, Cint,
-         Ptr{CHOLMOD.C_Sparse{Tv}}, Ptr{CHOLMOD.C_Sparse{Tv}}, Ptr{CHOLMOD.C_Dense{Tv}},
-         Ptr{Ptr{CHOLMOD.C_Sparse{Tv}}}, Ptr{Ptr{CHOLMOD.C_Dense{Tv}}}, Ptr{Ptr{CHOLMOD.C_Sparse{Tv}}},
-         Ptr{Ptr{CHOLMOD.SuiteSparse_long}}, Ptr{Ptr{CHOLMOD.C_Sparse{Tv}}}, Ptr{Ptr{CHOLMOD.SuiteSparse_long}},
-         Ptr{Ptr{CHOLMOD.C_Dense{Tv}}}, Ptr{Cvoid}),
-        ordering,       # all, except 3:given treated as 0:fixed
-        tol,            # columns with 2-norm <= tol treated as 0
-        econ,           # e = max(min(m,econ),rank(A))
-        getCTX,         # 0: Z=C (e-by-k), 1: Z=C', 2: Z=X (e-by-k)
-        A,              # m-by-n sparse matrix to factorize
-        Bsparse,        # sparse m-by-k B
-        Bdense,         # dense  m-by-k B
-        # /* outputs: */
-        Zsparse,        # sparse Z
-        Zdense,         # dense Z
-        R,              # e-by-n sparse matrix */
-        E,              # size n column perm, NULL if identity */
-        H,              # m-by-nh Householder vectors
-        HPinv,          # size m row permutation
-        HTau,           # 1-by-nh Householder coefficients
-        CHOLMOD.common_struct[Threads.threadid()]) # /* workspace and parameters */
-
-    if rnk < 0
-        error("Sparse QR factorization failed")
-    end
-
-    e = E[]
-    if e == C_NULL
-        _E = Vector{CHOLMOD.SuiteSparse_long}()
-    else
-        _E = Vector{CHOLMOD.SuiteSparse_long}(undef, n)
-        for i in 1:n
-            @inbounds _E[i] = unsafe_load(e, i) + 1
-        end
-        # Free memory allocated by SPQR. This call will make sure that the
-        # correct deallocator function is called and that the memory count in
-        # the common struct is updated
-        ccall((:cholmod_l_free, :libcholmod), Cvoid,
-            (Csize_t, Cint, Ptr{CHOLMOD.SuiteSparse_long}, Ptr{Cvoid}),
-            n, sizeof(CHOLMOD.SuiteSparse_long), e, CHOLMOD.common_struct[Threads.threadid()])
-    end
-    hpinv = HPinv[]
-    if hpinv == C_NULL
-        _HPinv = Vector{CHOLMOD.SuiteSparse_long}()
-    else
-        _HPinv = Vector{CHOLMOD.SuiteSparse_long}(undef, m)
-        for i in 1:m
-            @inbounds _HPinv[i] = unsafe_load(hpinv, i) + 1
-        end
-        # Free memory allocated by SPQR. This call will make sure that the
-        # correct deallocator function is called and that the memory count in
-        # the common struct is updated
-        ccall((:cholmod_l_free, :libcholmod), Cvoid,
-            (Csize_t, Cint, Ptr{CHOLMOD.SuiteSparse_long}, Ptr{Cvoid}),
-            m, sizeof(CHOLMOD.SuiteSparse_long), hpinv, CHOLMOD.common_struct[Threads.threadid()])
-    end
-
-    return rnk, _E, _HPinv
-end
-
-# Struct for storing sparse QR from SPQR such that
-# A[invperm(rpivinv), cpiv] = (I - factors[:,1]*τ[1]*factors[:,1]')*...*(I - factors[:,k]*τ[k]*factors[:,k]')*R
-# with k = size(factors, 2).
-struct QRSparse{Tv,Ti} <: LinearAlgebra.Factorization{Tv}
-    factors::SparseMatrixCSC{Tv,Ti}
-    τ::Vector{Tv}
-    R::SparseMatrixCSC{Tv,Ti}
-    cpiv::Vector{Ti}
-    rpivinv::Vector{Ti}
-end
-
-Base.size(F::QRSparse) = (size(F.factors, 1), size(F.R, 2))
-function Base.size(F::QRSparse, i::Integer)
-    if i == 1
-        return size(F.factors, 1)
-    elseif i == 2
-        return size(F.R, 2)
-    elseif i > 2
-        return 1
-    else
-        throw(ArgumentError("second argument must be positive"))
-    end
-end
-Base.axes(F::QRSparse) = map(Base.OneTo, size(F))
-
-struct QRSparseQ{Tv<:CHOLMOD.VTypes,Ti<:Integer} <: LinearAlgebra.AbstractQ{Tv}
-    factors::SparseMatrixCSC{Tv,Ti}
-    τ::Vector{Tv}
-    n::Int # Number of columns in original matrix
-end
-
-Base.size(Q::QRSparseQ) = (size(Q.factors, 1), size(Q.factors, 1))
-Base.axes(Q::QRSparseQ) = map(Base.OneTo, size(Q))
-
-Matrix{T}(Q::QRSparseQ) where {T} = lmul!(Q, Matrix{T}(I, size(Q, 1), min(size(Q, 1), Q.n)))
-
-# From SPQR manual p. 6
-_default_tol(A::SparseMatrixCSC) =
-    20*sum(size(A))*eps(real(eltype(A)))*maximum(norm(view(A, :, i)) for i in 1:size(A, 2))
-
-"""
-    qr(A::SparseMatrixCSC; tol=_default_tol(A), ordering=ORDERING_DEFAULT) -> QRSparse
-
-Compute the `QR` factorization of a sparse matrix `A`. Fill-reducing row and column permutations
-are used such that `F.R = F.Q'*A[F.prow,F.pcol]`. The main application of this type is to
-solve least squares or underdetermined problems with [`\\`](@ref). The function calls the C library SPQR.
-
-!!! note
-    `qr(A::SparseMatrixCSC)` uses the SPQR library that is part of SuiteSparse.
-    As this library only supports sparse matrices with [`Float64`](@ref) or
-    `ComplexF64` elements, as of Julia v1.4 `qr` converts `A` into a copy that is
-    of type `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}` as appropriate.
-
-# Examples
-```jldoctest
-julia> A = sparse([1,2,3,4], [1,1,2,2], [1.0,1.0,1.0,1.0])
-4×2 SparseMatrixCSC{Float64, Int64} with 4 stored entries:
- 1.0   ⋅
- 1.0   ⋅
-  ⋅   1.0
-  ⋅   1.0
-
-julia> qr(A)
-SuiteSparse.SPQR.QRSparse{Float64, Int64}
-Q factor:
-4×4 SuiteSparse.SPQR.QRSparseQ{Float64, Int64}:
- -0.707107   0.0        0.0       -0.707107
-  0.0       -0.707107  -0.707107   0.0
-  0.0       -0.707107   0.707107   0.0
- -0.707107   0.0        0.0        0.707107
-R factor:
-2×2 SparseMatrixCSC{Float64, Int64} with 2 stored entries:
- -1.41421    ⋅
-   ⋅       -1.41421
-Row permutation:
-4-element Vector{Int64}:
- 1
- 3
- 4
- 2
-Column permutation:
-2-element Vector{Int64}:
- 1
- 2
-```
-"""
-function LinearAlgebra.qr(A::SparseMatrixCSC{Tv}; tol=_default_tol(A), ordering=ORDERING_DEFAULT) where {Tv <: CHOLMOD.VTypes}
-    R     = Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}()
-    E     = Ref{Ptr{CHOLMOD.SuiteSparse_long}}()
-    H     = Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}()
-    HPinv = Ref{Ptr{CHOLMOD.SuiteSparse_long}}()
-    HTau  = Ref{Ptr{CHOLMOD.C_Dense{Tv}}}(C_NULL)
-
-    # SPQR doesn't accept symmetric matrices so we explicitly set the stype
-    r, p, hpinv = _qr!(ordering, tol, 0, 0, Sparse(A, 0),
-        C_NULL, C_NULL, C_NULL, C_NULL,
-        R, E, H, HPinv, HTau)
-
-    R_ = SparseMatrixCSC(Sparse(R[]))
-    return QRSparse(SparseMatrixCSC(Sparse(H[])),
-                    vec(Array(CHOLMOD.Dense(HTau[]))),
-                    SparseMatrixCSC(min(size(A)...),
-                                    size(R_, 2),
-                                    getcolptr(R_),
-                                    rowvals(R_),
-                                    nonzeros(R_)),
-                    p, hpinv)
-end
-LinearAlgebra.qr(A::SparseMatrixCSC{<:Union{Float16,Float32}}; tol=_default_tol(A)) =
-    qr(convert(SparseMatrixCSC{Float64}, A); tol=tol)
-LinearAlgebra.qr(A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32}}; tol=_default_tol(A)) =
-    qr(convert(SparseMatrixCSC{ComplexF64}, A); tol=tol)
-LinearAlgebra.qr(A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}};
-   tol=_default_tol(A)) where {T<:AbstractFloat} =
-    throw(ArgumentError(string("matrix type ", typeof(A), "not supported. ",
-    "Try qr(convert(SparseMatrixCSC{Float64/ComplexF64, Int}, A)) for ",
-    "sparse floating point QR using SPQR or qr(Array(A)) for generic ",
-    "dense QR.")))
-LinearAlgebra.qr(A::SparseMatrixCSC; tol=_default_tol(A)) = qr(float(A); tol=tol)
-
-function LinearAlgebra.lmul!(Q::QRSparseQ, A::StridedVecOrMat)
-    if size(A, 1) != size(Q, 1)
-        throw(DimensionMismatch("size(Q) = $(size(Q)) but size(A) = $(size(A))"))
-    end
-    for l in size(Q.factors, 2):-1:1
-        τl = -Q.τ[l]
-        h = view(Q.factors, :, l)
-        for j in 1:size(A, 2)
-            a = view(A, :, j)
-            LinearAlgebra.axpy!(τl*dot(h, a), h, a)
-        end
-    end
-    return A
-end
-
-function LinearAlgebra.rmul!(A::StridedMatrix, Q::QRSparseQ)
-    if size(A, 2) != size(Q, 1)
-        throw(DimensionMismatch("size(Q) = $(size(Q)) but size(A) = $(size(A))"))
-    end
-    tmp = similar(A, size(A, 1))
-    for l in 1:size(Q.factors, 2)
-        τl = -Q.τ[l]
-        h = view(Q.factors, :, l)
-        LinearAlgebra.mul!(tmp, A, h)
-        LinearAlgebra.lowrankupdate!(A, tmp, h, τl)
-    end
-    return A
-end
-
-function LinearAlgebra.lmul!(adjQ::Adjoint{<:Any,<:QRSparseQ}, A::StridedVecOrMat)
-    Q = adjQ.parent
-    if size(A, 1) != size(Q, 1)
-        throw(DimensionMismatch("size(Q) = $(size(Q)) but size(A) = $(size(A))"))
-    end
-    for l in 1:size(Q.factors, 2)
-        τl = -Q.τ[l]
-        h = view(Q.factors, :, l)
-        for j in 1:size(A, 2)
-            a = view(A, :, j)
-            LinearAlgebra.axpy!(τl'*dot(h, a), h, a)
-        end
-    end
-    return A
-end
-
-function LinearAlgebra.rmul!(A::StridedMatrix, adjQ::Adjoint{<:Any,<:QRSparseQ})
-    Q = adjQ.parent
-    if size(A, 2) != size(Q, 1)
-        throw(DimensionMismatch("size(Q) = $(size(Q)) but size(A) = $(size(A))"))
-    end
-    tmp = similar(A, size(A, 1))
-    for l in size(Q.factors, 2):-1:1
-        τl = -Q.τ[l]
-        h = view(Q.factors, :, l)
-        LinearAlgebra.mul!(tmp, A, h)
-        LinearAlgebra.lowrankupdate!(A, tmp, h, τl')
-    end
-    return A
-end
-
-@inline function Base.getproperty(F::QRSparse, d::Symbol)
-    if d === :Q
-        return QRSparseQ(F.factors, F.τ, size(F, 2))
-    elseif d === :prow
-        return invperm(F.rpivinv)
-    elseif d === :pcol
-        return F.cpiv
-    else
-        getfield(F, d)
-    end
-end
-
-function Base.propertynames(F::QRSparse, private::Bool=false)
-    public = (:R, :Q, :prow, :pcol)
-    private ? ((public ∪ fieldnames(typeof(F)))...,) : public
-end
-
-function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, F::QRSparse)
-    summary(io, F); println(io)
-    println(io, "Q factor:")
-    show(io, mime, F.Q)
-    println(io, "\nR factor:")
-    show(io, mime, F.R)
-    println(io, "\nRow permutation:")
-    show(io, mime, F.prow)
-    println(io, "\nColumn permutation:")
-    show(io, mime, F.pcol)
-end
-
-# With a real lhs and complex rhs with the same precision, we can reinterpret
-# the complex rhs as a real rhs with twice the number of columns
-#
-# This definition is similar to the definition in factorization.jl except that
-# here we have to use \ instead of ldiv! because of limitations in SPQR
-
-## Two helper methods
-_ret_size(F::QRSparse, b::AbstractVector) = (size(F, 2),)
-_ret_size(F::QRSparse, B::AbstractMatrix) = (size(F, 2), size(B, 2))
-
-LinearAlgebra.rank(F::QRSparse) = reduce(max, view(rowvals(F.R), 1:nnz(F.R)), init = eltype(rowvals(F.R))(0))
-LinearAlgebra.rank(S::SparseMatrixCSC) = rank(qr(S))
-
-function (\)(F::QRSparse{T}, B::VecOrMat{Complex{T}}) where T<:LinearAlgebra.BlasReal
-# |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
-# |z2|z4|      ->       |y1|y2|y3|y4|     ->      |x2|y2|     ->    |x2|y2|x4|y4|
-#                                                 |x3|y3|
-#                                                 |x4|y4|
-    require_one_based_indexing(F, B)
-    c2r = reshape(copy(transpose(reinterpret(T, reshape(B, (1, length(B)))))), size(B, 1), 2*size(B, 2))
-    x = F\c2r
-
-# |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
-# |z2|z4|      <-       |y1|y2|y3|y4|     <-      |x2|y2|     <-    |x2|y2|x4|y4|
-#                                                 |x3|y3|
-#                                                 |x4|y4|
-    return collect(reshape(reinterpret(Complex{T}, copy(transpose(reshape(x, (length(x) >> 1), 2)))), _ret_size(F, B)))
-end
-
-function _ldiv_basic(F::QRSparse, B::StridedVecOrMat)
-    if size(F, 1) != size(B, 1)
-        throw(DimensionMismatch("size(F) = $(size(F)) but size(B) = $(size(B))"))
-    end
-
-    # The rank of F equal might be reduced
-    rnk = rank(F)
-
-    # allocate an array for the return value large enough to hold B and X
-    # For overdetermined problem, B is larger than X and vice versa
-    X   = similar(B, ntuple(i -> i == 1 ? max(size(F, 2), size(B, 1)) : size(B, 2), Val(ndims(B))))
-
-    # Fill will zeros. These will eventually become the zeros in the basic solution
-    # fill!(X, 0)
-    # Apply left permutation to the solution and store in X
-    for j in 1:size(B, 2)
-        for i in 1:length(F.rpivinv)
-            @inbounds X[F.rpivinv[i], j] = B[i, j]
-        end
-    end
-
-    # Make a view into X corresponding to the size of B
-    X0 = view(X, 1:size(B, 1), :)
-
-    # Apply Q' to B
-    LinearAlgebra.lmul!(adjoint(F.Q), X0)
-
-    # Zero out to get basic solution
-    X[rnk + 1:end, :] .= 0
-
-    # Solve R*X = B
-    LinearAlgebra.ldiv!(UpperTriangular(F.R[Base.OneTo(rnk), Base.OneTo(rnk)]),
-                        view(X0, Base.OneTo(rnk), :))
-
-    # Apply right permutation and extract solution from X
-    # NB: cpiv == [] if SPQR was called with ORDERING_FIXED
-    if length(F.cpiv) == 0
-      return getindex(X, ntuple(i -> i == 1 ? (1:size(F,2)) : :, Val(ndims(B)))...)
-    end
-    return getindex(X, ntuple(i -> i == 1 ? invperm(F.cpiv) : :, Val(ndims(B)))...)
-end
-
-(\)(F::QRSparse{T}, B::StridedVecOrMat{T}) where {T} = _ldiv_basic(F, B)
-"""
-    (\\)(F::QRSparse, B::StridedVecOrMat)
-
-Solve the least squares problem ``\\min\\|Ax - b\\|^2`` or the linear system of equations
-``Ax=b`` when `F` is the sparse QR factorization of ``A``. A basic solution is returned
-when the problem is underdetermined.
-
-# Examples
-```jldoctest
-julia> A = sparse([1,2,4], [1,1,1], [1.0,1.0,1.0], 4, 2)
-4×2 SparseMatrixCSC{Float64, Int64} with 3 stored entries:
- 1.0   ⋅
- 1.0   ⋅
-  ⋅    ⋅
- 1.0   ⋅
-
-julia> qr(A)\\fill(1.0, 4)
-2-element Vector{Float64}:
- 1.0
- 0.0
-```
-"""
-(\)(F::QRSparse, B::StridedVecOrMat) = F\convert(AbstractArray{eltype(F)}, B)
-
-end # module
diff --git a/stdlib/SuiteSparse/src/umfpack.jl b/stdlib/SuiteSparse/src/umfpack.jl
deleted file mode 100644
index a6e0cf54d4b23a..00000000000000
--- a/stdlib/SuiteSparse/src/umfpack.jl
+++ /dev/null
@@ -1,643 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module UMFPACK
-
-export UmfpackLU
-
-import Base: (\), getproperty, show, size
-using LinearAlgebra
-import LinearAlgebra: Factorization, det, lu, lu!, ldiv!
-
-using SparseArrays
-using SparseArrays: getcolptr
-import SparseArrays: nnz
-
-import Serialization: AbstractSerializer, deserialize
-
-import ..increment, ..increment!, ..decrement, ..decrement!
-
-include("umfpack_h.jl")
-struct MatrixIllConditionedException <: Exception
-    msg::AbstractString
-end
-
-function umferror(status::Integer)
-    if status==UMFPACK_OK
-        return
-    elseif status==UMFPACK_WARNING_singular_matrix
-        throw(LinearAlgebra.SingularException(0))
-    elseif status==UMFPACK_WARNING_determinant_underflow
-        throw(MatrixIllConditionedException("the determinant is nonzero but underflowed"))
-    elseif status==UMFPACK_WARNING_determinant_overflow
-        throw(MatrixIllConditionedException("the determinant overflowed"))
-    elseif status==UMFPACK_ERROR_out_of_memory
-        throw(OutOfMemoryError())
-    elseif status==UMFPACK_ERROR_invalid_Numeric_object
-        throw(ArgumentError("invalid UMFPack numeric object"))
-    elseif status==UMFPACK_ERROR_invalid_Symbolic_object
-        throw(ArgumentError("invalid UMFPack symbolic object"))
-    elseif status==UMFPACK_ERROR_argument_missing
-        throw(ArgumentError("a required argument to UMFPack is missing"))
-    elseif status==UMFPACK_ERROR_n_nonpositive
-        throw(ArgumentError("the number of rows or columns of the matrix must be greater than zero"))
-    elseif status==UMFPACK_ERROR_invalid_matrix
-        throw(ArgumentError("invalid matrix"))
-    elseif status==UMFPACK_ERROR_different_pattern
-        throw(ArgumentError("pattern of the matrix changed"))
-    elseif status==UMFPACK_ERROR_invalid_system
-        throw(ArgumentError("invalid sys argument provided to UMFPack solver"))
-    elseif status==UMFPACK_ERROR_invalid_permutation
-        throw(ArgumentError("invalid permutation"))
-    elseif status==UMFPACK_ERROR_file_IO
-        throw(ErrorException("error saving / loading UMFPack decomposition"))
-    elseif status==UMFPACK_ERROR_ordering_failed
-        throw(ErrorException("the ordering method failed"))
-    elseif status==UMFPACK_ERROR_internal_error
-        throw(ErrorException("an internal error has occurred, of unknown cause"))
-    else
-        throw(ErrorException("unknown UMFPack error code: $status"))
-    end
-end
-
-macro isok(A)
-    :(umferror($(esc(A))))
-end
-
-# check the size of SuiteSparse_long
-if Int(ccall((:jl_cholmod_sizeof_long,:libsuitesparse_wrapper),Csize_t,())) == 4
-    const UmfpackIndexTypes = (:Int32,)
-    const UMFITypes = Int32
-else
-    const UmfpackIndexTypes = (:Int32, :Int64)
-    const UMFITypes = Union{Int32, Int64}
-end
-
-const UMFVTypes = Union{Float64,ComplexF64}
-
-## UMFPACK
-
-# the control and info arrays
-const umf_ctrl = Vector{Float64}(undef, UMFPACK_CONTROL)
-ccall((:umfpack_dl_defaults,:libumfpack), Cvoid, (Ptr{Float64},), umf_ctrl)
-const umf_info = Vector{Float64}(undef, UMFPACK_INFO)
-
-function show_umf_ctrl(level::Real = 2.0)
-    old_prt::Float64 = umf_ctrl[1]
-    umf_ctrl[1] = Float64(level)
-    ccall((:umfpack_dl_report_control, :libumfpack), Cvoid, (Ptr{Float64},), umf_ctrl)
-    umf_ctrl[1] = old_prt
-end
-
-function show_umf_info(level::Real = 2.0)
-    old_prt::Float64 = umf_ctrl[1]
-    umf_ctrl[1] = Float64(level)
-    ccall((:umfpack_dl_report_info, :libumfpack), Cvoid,
-          (Ptr{Float64}, Ptr{Float64}), umf_ctrl, umf_info)
-    umf_ctrl[1] = old_prt
-end
-
-## Should this type be immutable?
-mutable struct UmfpackLU{Tv<:UMFVTypes,Ti<:UMFITypes} <: Factorization{Tv}
-    symbolic::Ptr{Cvoid}
-    numeric::Ptr{Cvoid}
-    m::Int
-    n::Int
-    colptr::Vector{Ti}                  # 0-based column pointers
-    rowval::Vector{Ti}                  # 0-based row indices
-    nzval::Vector{Tv}
-    status::Int
-end
-
-Base.adjoint(F::UmfpackLU) = Adjoint(F)
-Base.transpose(F::UmfpackLU) = Transpose(F)
-
-"""
-    lu(A::SparseMatrixCSC; check = true) -> F::UmfpackLU
-
-Compute the LU factorization of a sparse matrix `A`.
-
-For sparse `A` with real or complex element type, the return type of `F` is
-`UmfpackLU{Tv, Ti}`, with `Tv` = [`Float64`](@ref) or `ComplexF64` respectively and
-`Ti` is an integer type ([`Int32`](@ref) or [`Int64`](@ref)).
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-The individual components of the factorization `F` can be accessed by indexing:
-
-| Component | Description                         |
-|:----------|:------------------------------------|
-| `L`       | `L` (lower triangular) part of `LU` |
-| `U`       | `U` (upper triangular) part of `LU` |
-| `p`       | right permutation `Vector`          |
-| `q`       | left permutation `Vector`           |
-| `Rs`      | `Vector` of scaling factors         |
-| `:`       | `(L,U,p,q,Rs)` components           |
-
-The relation between `F` and `A` is
-
-`F.L*F.U == (F.Rs .* A)[F.p, F.q]`
-
-`F` further supports the following functions:
-
-- [`\\`](@ref)
-- [`cond`](@ref)
-- [`det`](@ref)
-
-!!! note
-    `lu(A::SparseMatrixCSC)` uses the UMFPACK library that is part of
-    SuiteSparse. As this library only supports sparse matrices with [`Float64`](@ref) or
-    `ComplexF64` elements, `lu` converts `A` into a copy that is of type
-    `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}` as appropriate.
-"""
-function lu(S::SparseMatrixCSC{<:UMFVTypes,<:UMFITypes}; check::Bool = true)
-    zerobased = getcolptr(S)[1] == 0
-    res = UmfpackLU(C_NULL, C_NULL, size(S, 1), size(S, 2),
-                    zerobased ? copy(getcolptr(S)) : decrement(getcolptr(S)),
-                    zerobased ? copy(rowvals(S)) : decrement(rowvals(S)),
-                    copy(nonzeros(S)), 0)
-    finalizer(umfpack_free_symbolic, res)
-    umfpack_numeric!(res)
-    check && (issuccess(res) || throw(LinearAlgebra.SingularException(0)))
-    return res
-end
-lu(A::SparseMatrixCSC{<:Union{Float16,Float32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu(convert(SparseMatrixCSC{Float64,Ti}, A); check = check)
-lu(A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu(convert(SparseMatrixCSC{ComplexF64,Ti}, A); check = check)
-lu(A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}};
-   check::Bool = true) where {T<:AbstractFloat} =
-    throw(ArgumentError(string("matrix type ", typeof(A), "not supported. ",
-    "Try lu(convert(SparseMatrixCSC{Float64/ComplexF64,Int}, A)) for ",
-    "sparse floating point LU using UMFPACK or lu(Array(A)) for generic ",
-    "dense LU.")))
-lu(A::SparseMatrixCSC; check::Bool = true) = lu(float(A); check = check)
-
-"""
-    lu!(F::UmfpackLU, A::SparseMatrixCSC; check=true) -> F::UmfpackLU
-
-Compute the LU factorization of a sparse matrix `A`, reusing the symbolic
-factorization of an already existing LU factorization stored in `F`. The
-sparse matrix `A` must have an identical nonzero pattern as the matrix used
-to create the LU factorization `F`, otherwise an error is thrown.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-!!! note
-    `lu!(F::UmfpackLU, A::SparseMatrixCSC)` uses the UMFPACK library that is part of
-    SuiteSparse. As this library only supports sparse matrices with [`Float64`](@ref) or
-    `ComplexF64` elements, `lu!` converts `A` into a copy that is of type
-    `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}` as appropriate.
-
-!!! compat "Julia 1.5"
-    `lu!` for `UmfpackLU` requires at least Julia 1.5.
-
-# Examples
-```jldoctest
-julia> A = sparse(Float64[1.0 2.0; 0.0 3.0]);
-
-julia> F = lu(A);
-
-julia> B = sparse(Float64[1.0 1.0; 0.0 1.0]);
-
-julia> lu!(F, B);
-
-julia> F \\ ones(2)
-2-element Vector{Float64}:
- 0.0
- 1.0
-```
-"""
-function lu!(F::UmfpackLU, S::SparseMatrixCSC{<:UMFVTypes,<:UMFITypes}; check::Bool=true)
-    zerobased = getcolptr(S)[1] == 0
-    F.m = size(S, 1)
-    F.n = size(S, 2)
-    F.colptr = zerobased ? copy(getcolptr(S)) : decrement(getcolptr(S))
-    F.rowval = zerobased ? copy(rowvals(S)) : decrement(rowvals(S))
-    F.nzval = copy(nonzeros(S))
-
-    umfpack_numeric!(F, reuse_numeric = false)
-    check && (issuccess(F) || throw(LinearAlgebra.SingularException(0)))
-    return F
-end
-lu!(F::UmfpackLU, A::SparseMatrixCSC{<:Union{Float16,Float32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu!(F, convert(SparseMatrixCSC{Float64,Ti}, A); check = check)
-lu!(F::UmfpackLU, A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu!(F, convert(SparseMatrixCSC{ComplexF64,Ti}, A); check = check)
-lu!(F::UmfpackLU, A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}};
-   check::Bool = true) where {T<:AbstractFloat} =
-    throw(ArgumentError(string("matrix type ", typeof(A), "not supported.")))
-lu!(F::UmfpackLU, A::SparseMatrixCSC; check::Bool = true) = lu!(F, float(A); check = check)
-
-size(F::UmfpackLU) = (F.m, F.n)
-function size(F::UmfpackLU, dim::Integer)
-    if dim < 1
-        throw(ArgumentError("size: dimension $dim out of range"))
-    elseif dim == 1
-        return Int(F.m)
-    elseif dim == 2
-        return Int(F.n)
-    else
-        return 1
-    end
-end
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::UmfpackLU)
-    if F.numeric != C_NULL
-        if issuccess(F)
-            summary(io, F); println(io)
-            println(io, "L factor:")
-            show(io, mime, F.L)
-            println(io, "\nU factor:")
-            show(io, mime, F.U)
-        else
-            print(io, "Failed factorization of type $(typeof(F))")
-        end
-    end
-end
-
-function deserialize(s::AbstractSerializer, t::Type{UmfpackLU{Tv,Ti}}) where {Tv,Ti}
-    symbolic = deserialize(s)
-    numeric  = deserialize(s)
-    m        = deserialize(s)
-    n        = deserialize(s)
-    colptr   = deserialize(s)
-    rowval   = deserialize(s)
-    nzval    = deserialize(s)
-    status   = deserialize(s)
-    obj      = UmfpackLU{Tv,Ti}(symbolic, numeric, m, n, colptr, rowval, nzval, status)
-
-    finalizer(umfpack_free_symbolic, obj)
-
-    return obj
-end
-
-## Wrappers for UMFPACK functions
-
-# generate the name of the C function according to the value and integer types
-umf_nm(nm,Tv,Ti) = "umfpack_" * (Tv === :Float64 ? "d" : "z") * (Ti === :Int64 ? "l_" : "i_") * nm
-
-for itype in UmfpackIndexTypes
-    sym_r = umf_nm("symbolic", :Float64, itype)
-    sym_c = umf_nm("symbolic", :ComplexF64, itype)
-    num_r = umf_nm("numeric", :Float64, itype)
-    num_c = umf_nm("numeric", :ComplexF64, itype)
-    sol_r = umf_nm("solve", :Float64, itype)
-    sol_c = umf_nm("solve", :ComplexF64, itype)
-    det_r = umf_nm("get_determinant", :Float64, itype)
-    det_z = umf_nm("get_determinant", :ComplexF64, itype)
-    lunz_r = umf_nm("get_lunz", :Float64, itype)
-    lunz_z = umf_nm("get_lunz", :ComplexF64, itype)
-    get_num_r = umf_nm("get_numeric", :Float64, itype)
-    get_num_z = umf_nm("get_numeric", :ComplexF64, itype)
-    @eval begin
-        function umfpack_symbolic!(U::UmfpackLU{Float64,$itype})
-            if U.symbolic != C_NULL return U end
-            tmp = Vector{Ptr{Cvoid}}(undef, 1)
-            @isok ccall(($sym_r, :libumfpack), $itype,
-                        ($itype, $itype, Ptr{$itype}, Ptr{$itype}, Ptr{Float64}, Ptr{Cvoid},
-                         Ptr{Float64}, Ptr{Float64}),
-                        U.m, U.n, U.colptr, U.rowval, U.nzval, tmp,
-                        umf_ctrl, umf_info)
-            U.symbolic = tmp[1]
-            return U
-        end
-        function umfpack_symbolic!(U::UmfpackLU{ComplexF64,$itype})
-            if U.symbolic != C_NULL return U end
-            tmp = Vector{Ptr{Cvoid}}(undef, 1)
-            @isok ccall(($sym_c, :libumfpack), $itype,
-                        ($itype, $itype, Ptr{$itype}, Ptr{$itype}, Ptr{Float64}, Ptr{Float64}, Ptr{Cvoid},
-                         Ptr{Float64}, Ptr{Float64}),
-                        U.m, U.n, U.colptr, U.rowval, real(U.nzval), imag(U.nzval), tmp,
-                        umf_ctrl, umf_info)
-            U.symbolic = tmp[1]
-            return U
-        end
-        function umfpack_numeric!(U::UmfpackLU{Float64,$itype}; reuse_numeric = true)
-            if (reuse_numeric && U.numeric != C_NULL) return U end
-            if U.symbolic == C_NULL umfpack_symbolic!(U) end
-            tmp = Vector{Ptr{Cvoid}}(undef, 1)
-            status = ccall(($num_r, :libumfpack), $itype,
-                           (Ptr{$itype}, Ptr{$itype}, Ptr{Float64}, Ptr{Cvoid}, Ptr{Cvoid},
-                            Ptr{Float64}, Ptr{Float64}),
-                           U.colptr, U.rowval, U.nzval, U.symbolic, tmp,
-                           umf_ctrl, umf_info)
-            U.status = status
-            if status != UMFPACK_WARNING_singular_matrix
-                umferror(status)
-            end
-            U.numeric != C_NULL && umfpack_free_numeric(U)
-            U.numeric = tmp[1]
-            return U
-        end
-        function umfpack_numeric!(U::UmfpackLU{ComplexF64,$itype}; reuse_numeric = true)
-            if (reuse_numeric && U.numeric != C_NULL) return U end
-            if U.symbolic == C_NULL umfpack_symbolic!(U) end
-            tmp = Vector{Ptr{Cvoid}}(undef, 1)
-            status = ccall(($num_c, :libumfpack), $itype,
-                           (Ptr{$itype}, Ptr{$itype}, Ptr{Float64}, Ptr{Float64}, Ptr{Cvoid}, Ptr{Cvoid},
-                            Ptr{Float64}, Ptr{Float64}),
-                           U.colptr, U.rowval, real(U.nzval), imag(U.nzval), U.symbolic, tmp,
-                           umf_ctrl, umf_info)
-            U.status = status
-            if status != UMFPACK_WARNING_singular_matrix
-                umferror(status)
-            end
-            U.numeric != C_NULL && umfpack_free_numeric(U)
-            U.numeric = tmp[1]
-            return U
-        end
-        function solve!(x::StridedVector{Float64}, lu::UmfpackLU{Float64,$itype}, b::StridedVector{Float64}, typ::Integer)
-            if x === b
-                throw(ArgumentError("output array must not be aliased with input array"))
-            end
-            if stride(x, 1) != 1 || stride(b, 1) != 1
-                throw(ArgumentError("in and output vectors must have unit strides"))
-            end
-            umfpack_numeric!(lu)
-            (size(b,1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
-            @isok ccall(($sol_r, :libumfpack), $itype,
-                ($itype, Ptr{$itype}, Ptr{$itype}, Ptr{Float64},
-                 Ptr{Float64}, Ptr{Float64}, Ptr{Cvoid}, Ptr{Float64},
-                 Ptr{Float64}),
-                typ, lu.colptr, lu.rowval, lu.nzval,
-                x, b, lu.numeric, umf_ctrl,
-                umf_info)
-            return x
-        end
-        function solve!(x::StridedVector{ComplexF64}, lu::UmfpackLU{ComplexF64,$itype}, b::StridedVector{ComplexF64}, typ::Integer)
-            if x === b
-                throw(ArgumentError("output array must not be aliased with input array"))
-            end
-            if stride(x, 1) != 1 || stride(b, 1) != 1
-                throw(ArgumentError("in and output vectors must have unit strides"))
-            end
-            umfpack_numeric!(lu)
-            (size(b, 1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
-            n = size(b, 1)
-            @isok ccall(($sol_c, :libumfpack), $itype,
-                        ($itype, Ptr{$itype}, Ptr{$itype}, Ptr{Float64},
-                         Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64},
-                         Ptr{Float64}, Ptr{Cvoid}, Ptr{Float64}, Ptr{Float64}),
-                        typ, lu.colptr, lu.rowval, lu.nzval,
-                        C_NULL, x, C_NULL, b,
-                        C_NULL, lu.numeric, umf_ctrl, umf_info)
-            return x
-        end
-        function det(lu::UmfpackLU{Float64,$itype})
-            mx = Ref{Float64}()
-            @isok ccall(($det_r,:libumfpack), $itype,
-                           (Ptr{Float64},Ptr{Float64},Ptr{Cvoid},Ptr{Float64}),
-                           mx, C_NULL, lu.numeric, umf_info)
-            mx[]
-        end
-        function det(lu::UmfpackLU{ComplexF64,$itype})
-            mx = Ref{Float64}()
-            mz = Ref{Float64}()
-            @isok ccall(($det_z,:libumfpack), $itype,
-                        (Ptr{Float64},Ptr{Float64},Ptr{Float64},Ptr{Cvoid},Ptr{Float64}),
-                        mx, mz, C_NULL, lu.numeric, umf_info)
-            complex(mx[], mz[])
-        end
-        function umf_lunz(lu::UmfpackLU{Float64,$itype})
-            lnz = Ref{$itype}()
-            unz = Ref{$itype}()
-            n_row = Ref{$itype}()
-            n_col = Ref{$itype}()
-            nz_diag = Ref{$itype}()
-            @isok ccall(($lunz_r,:libumfpack), $itype,
-                           (Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{Cvoid}),
-                           lnz, unz, n_row, n_col, nz_diag, lu.numeric)
-            (lnz[], unz[], n_row[], n_col[], nz_diag[])
-        end
-        function umf_lunz(lu::UmfpackLU{ComplexF64,$itype})
-            lnz = Ref{$itype}()
-            unz = Ref{$itype}()
-            n_row = Ref{$itype}()
-            n_col = Ref{$itype}()
-            nz_diag = Ref{$itype}()
-            @isok ccall(($lunz_z,:libumfpack), $itype,
-                           (Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{Cvoid}),
-                           lnz, unz, n_row, n_col, nz_diag, lu.numeric)
-            (lnz[], unz[], n_row[], n_col[], nz_diag[])
-        end
-        function umf_extract(lu::UmfpackLU{Float64,$itype})
-            umfpack_numeric!(lu)        # ensure the numeric decomposition exists
-            (lnz, unz, n_row, n_col, nz_diag) = umf_lunz(lu)
-            Lp = Vector{$itype}(undef, n_row + 1)
-            Lj = Vector{$itype}(undef, lnz) # L is returned in CSR (compressed sparse row) format
-            Lx = Vector{Float64}(undef, lnz)
-            Up = Vector{$itype}(undef, n_col + 1)
-            Ui = Vector{$itype}(undef, unz)
-            Ux = Vector{Float64}(undef, unz)
-            P  = Vector{$itype}(undef, n_row)
-            Q  = Vector{$itype}(undef, n_col)
-            Rs = Vector{Float64}(undef, n_row)
-            @isok ccall(($get_num_r,:libumfpack), $itype,
-                        (Ptr{$itype},Ptr{$itype},Ptr{Float64},
-                         Ptr{$itype},Ptr{$itype},Ptr{Float64},
-                         Ptr{$itype},Ptr{$itype},Ptr{Cvoid},
-                         Ref{$itype},Ptr{Float64},Ptr{Cvoid}),
-                        Lp,Lj,Lx,
-                        Up,Ui,Ux,
-                        P, Q, C_NULL,
-                        0, Rs, lu.numeric)
-            (copy(transpose(SparseMatrixCSC(min(n_row, n_col), n_row, increment!(Lp), increment!(Lj), Lx))),
-             SparseMatrixCSC(min(n_row, n_col), n_col, increment!(Up), increment!(Ui), Ux),
-             increment!(P), increment!(Q), Rs)
-        end
-        function umf_extract(lu::UmfpackLU{ComplexF64,$itype})
-            umfpack_numeric!(lu)        # ensure the numeric decomposition exists
-            (lnz, unz, n_row, n_col, nz_diag) = umf_lunz(lu)
-            Lp = Vector{$itype}(undef, n_row + 1)
-            Lj = Vector{$itype}(undef, lnz) # L is returned in CSR (compressed sparse row) format
-            Lx = Vector{Float64}(undef, lnz)
-            Lz = Vector{Float64}(undef, lnz)
-            Up = Vector{$itype}(undef, n_col + 1)
-            Ui = Vector{$itype}(undef, unz)
-            Ux = Vector{Float64}(undef, unz)
-            Uz = Vector{Float64}(undef, unz)
-            P  = Vector{$itype}(undef, n_row)
-            Q  = Vector{$itype}(undef, n_col)
-            Rs = Vector{Float64}(undef, n_row)
-            @isok ccall(($get_num_z,:libumfpack), $itype,
-                        (Ptr{$itype},Ptr{$itype},Ptr{Float64},Ptr{Float64},
-                         Ptr{$itype},Ptr{$itype},Ptr{Float64},Ptr{Float64},
-                         Ptr{$itype},Ptr{$itype},Ptr{Cvoid}, Ptr{Cvoid},
-                         Ref{$itype},Ptr{Float64},Ptr{Cvoid}),
-                        Lp,Lj,Lx,Lz,
-                        Up,Ui,Ux,Uz,
-                        P, Q, C_NULL, C_NULL,
-                        0, Rs, lu.numeric)
-            (copy(transpose(SparseMatrixCSC(min(n_row, n_col), n_row, increment!(Lp), increment!(Lj), complex.(Lx, Lz)))),
-             SparseMatrixCSC(min(n_row, n_col), n_col, increment!(Up), increment!(Ui), complex.(Ux, Uz)),
-             increment!(P), increment!(Q), Rs)
-        end
-    end
-end
-
-function nnz(lu::UmfpackLU)
-    lnz, unz, = umf_lunz(lu)
-    return Int(lnz + unz)
-end
-
-LinearAlgebra.issuccess(lu::UmfpackLU) = lu.status == UMFPACK_OK
-
-### Solve with Factorization
-
-import LinearAlgebra.ldiv!
-
-ldiv!(lu::UmfpackLU{T}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    ldiv!(B, lu, copy(B))
-ldiv!(translu::Transpose{T,<:UmfpackLU{T}}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    (lu = translu.parent; ldiv!(B, transpose(lu), copy(B)))
-ldiv!(adjlu::Adjoint{T,<:UmfpackLU{T}}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    (lu = adjlu.parent; ldiv!(B, adjoint(lu), copy(B)))
-ldiv!(lu::UmfpackLU{Float64}, B::StridedVecOrMat{<:Complex}) =
-    ldiv!(B, lu, copy(B))
-ldiv!(translu::Transpose{Float64,<:UmfpackLU{Float64}}, B::StridedVecOrMat{<:Complex}) =
-    (lu = translu.parent; ldiv!(B, transpose(lu), copy(B)))
-ldiv!(adjlu::Adjoint{Float64,<:UmfpackLU{Float64}}, B::StridedVecOrMat{<:Complex}) =
-    (lu = adjlu.parent; ldiv!(B, adjoint(lu), copy(B)))
-
-ldiv!(X::StridedVecOrMat{T}, lu::UmfpackLU{T}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    _Aq_ldiv_B!(X, lu, B, UMFPACK_A)
-ldiv!(X::StridedVecOrMat{T}, translu::Transpose{T,<:UmfpackLU{T}}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    (lu = translu.parent; _Aq_ldiv_B!(X, lu, B, UMFPACK_Aat))
-ldiv!(X::StridedVecOrMat{T}, adjlu::Adjoint{T,<:UmfpackLU{T}}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    (lu = adjlu.parent; _Aq_ldiv_B!(X, lu, B, UMFPACK_At))
-ldiv!(X::StridedVecOrMat{Tb}, lu::UmfpackLU{Float64}, B::StridedVecOrMat{Tb}) where {Tb<:Complex} =
-    _Aq_ldiv_B!(X, lu, B, UMFPACK_A)
-ldiv!(X::StridedVecOrMat{Tb}, translu::Transpose{Float64,<:UmfpackLU{Float64}}, B::StridedVecOrMat{Tb}) where {Tb<:Complex} =
-    (lu = translu.parent; _Aq_ldiv_B!(X, lu, B, UMFPACK_Aat))
-ldiv!(X::StridedVecOrMat{Tb}, adjlu::Adjoint{Float64,<:UmfpackLU{Float64}}, B::StridedVecOrMat{Tb}) where {Tb<:Complex} =
-    (lu = adjlu.parent; _Aq_ldiv_B!(X, lu, B, UMFPACK_At))
-
-function _Aq_ldiv_B!(X::StridedVecOrMat, lu::UmfpackLU, B::StridedVecOrMat, transposeoptype)
-    if size(X, 2) != size(B, 2)
-        throw(DimensionMismatch("input and output arrays must have same number of columns"))
-    end
-    _AqldivB_kernel!(X, lu, B, transposeoptype)
-    return X
-end
-function _AqldivB_kernel!(x::StridedVector{T}, lu::UmfpackLU{T},
-                          b::StridedVector{T}, transposeoptype) where T<:UMFVTypes
-    solve!(x, lu, b, transposeoptype)
-end
-function _AqldivB_kernel!(X::StridedMatrix{T}, lu::UmfpackLU{T},
-                          B::StridedMatrix{T}, transposeoptype) where T<:UMFVTypes
-    for col in 1:size(X, 2)
-        solve!(view(X, :, col), lu, view(B, :, col), transposeoptype)
-    end
-end
-function _AqldivB_kernel!(x::StridedVector{Tb}, lu::UmfpackLU{Float64},
-                          b::StridedVector{Tb}, transposeoptype) where Tb<:Complex
-    r, i = similar(b, Float64), similar(b, Float64)
-    solve!(r, lu, Vector{Float64}(real(b)), transposeoptype)
-    solve!(i, lu, Vector{Float64}(imag(b)), transposeoptype)
-    map!(complex, x, r, i)
-end
-function _AqldivB_kernel!(X::StridedMatrix{Tb}, lu::UmfpackLU{Float64},
-                          B::StridedMatrix{Tb}, transposeoptype) where Tb<:Complex
-    r = similar(B, Float64, size(B, 1))
-    i = similar(B, Float64, size(B, 1))
-    for j in 1:size(B, 2)
-        solve!(r, lu, Vector{Float64}(real(view(B, :, j))), transposeoptype)
-        solve!(i, lu, Vector{Float64}(imag(view(B, :, j))), transposeoptype)
-        map!(complex, view(X, :, j), r, i)
-    end
-end
-
-
-@inline function getproperty(lu::UmfpackLU, d::Symbol)
-    if d === :L || d === :U || d === :p || d === :q || d === :Rs || d === :(:)
-        # Guard the call to umf_extract behaind a branch to avoid infinite recursion
-        L, U, p, q, Rs = umf_extract(lu)
-        if d === :L
-            return L
-        elseif d === :U
-            return U
-        elseif d === :p
-            return p
-        elseif d === :q
-            return q
-        elseif d === :Rs
-            return Rs
-        elseif d === :(:)
-            return (L, U, p, q, Rs)
-        end
-    else
-        getfield(lu, d)
-    end
-end
-
-for Tv in (:Float64, :ComplexF64), Ti in UmfpackIndexTypes
-    f = Symbol(umf_nm("free_symbolic", Tv, Ti))
-    @eval begin
-        function ($f)(symb::Ptr{Cvoid})
-            tmp = [symb]
-            ccall(($(string(f)), :libumfpack), Cvoid, (Ptr{Cvoid},), tmp)
-        end
-
-        function umfpack_free_symbolic(lu::UmfpackLU{$Tv,$Ti})
-            if lu.symbolic == C_NULL return lu end
-            umfpack_free_numeric(lu)
-            ($f)(lu.symbolic)
-            lu.symbolic = C_NULL
-            return lu
-        end
-    end
-
-    f = Symbol(umf_nm("free_numeric", Tv, Ti))
-    @eval begin
-        function ($f)(num::Ptr{Cvoid})
-            tmp = [num]
-            ccall(($(string(f)), :libumfpack), Cvoid, (Ptr{Cvoid},), tmp)
-        end
-        function umfpack_free_numeric(lu::UmfpackLU{$Tv,$Ti})
-            if lu.numeric == C_NULL return lu end
-            ($f)(lu.numeric)
-            lu.numeric = C_NULL
-            return lu
-        end
-    end
-end
-
-function umfpack_report_symbolic(symb::Ptr{Cvoid}, level::Real)
-    old_prl::Float64 = umf_ctrl[UMFPACK_PRL]
-    umf_ctrl[UMFPACK_PRL] = Float64(level)
-    @isok ccall((:umfpack_dl_report_symbolic, :libumfpack), Int,
-                (Ptr{Cvoid}, Ptr{Float64}), symb, umf_ctrl)
-    umf_ctrl[UMFPACK_PRL] = old_prl
-end
-
-umfpack_report_symbolic(symb::Ptr{Cvoid}) = umfpack_report_symbolic(symb, 4.)
-
-function umfpack_report_symbolic(lu::UmfpackLU, level::Real)
-    umfpack_report_symbolic(umfpack_symbolic!(lu).symbolic, level)
-end
-
-umfpack_report_symbolic(lu::UmfpackLU) = umfpack_report_symbolic(lu.symbolic,4.)
-function umfpack_report_numeric(num::Ptr{Cvoid}, level::Real)
-    old_prl::Float64 = umf_ctrl[UMFPACK_PRL]
-    umf_ctrl[UMFPACK_PRL] = Float64(level)
-    @isok ccall((:umfpack_dl_report_numeric, :libumfpack), Int,
-                (Ptr{Cvoid}, Ptr{Float64}), num, umf_ctrl)
-    umf_ctrl[UMFPACK_PRL] = old_prl
-end
-
-umfpack_report_numeric(num::Ptr{Cvoid}) = umfpack_report_numeric(num, 4.)
-function umfpack_report_numeric(lu::UmfpackLU, level::Real)
-    umfpack_report_numeric(umfpack_numeric!(lu).numeric, level)
-end
-
-umfpack_report_numeric(lu::UmfpackLU) = umfpack_report_numeric(lu,4.)
-
-end # UMFPACK module
diff --git a/stdlib/SuiteSparse/src/umfpack_h.jl b/stdlib/SuiteSparse/src/umfpack_h.jl
deleted file mode 100644
index 985f9387fcc756..00000000000000
--- a/stdlib/SuiteSparse/src/umfpack_h.jl
+++ /dev/null
@@ -1,43 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## UMFPACK
-
-## Type of solve
-const UMFPACK_A     =  0     # Ax=b
-const UMFPACK_At    =  1     # adjoint(A)x=b
-const UMFPACK_Aat   =  2     # transpose(A)x=b
-const UMFPACK_Pt_L  =  3     # adjoint(P)Lx=b
-const UMFPACK_L     =  4     # Lx=b
-const UMFPACK_Lt_P  =  5     # adjoint(L)Px=b
-const UMFPACK_Lat_P =  6     # transpose(L)Px=b
-const UMFPACK_Lt    =  7     # adjoint(L)x=b
-const UMFPACK_Lat   =  8     # transpose(L)x=b
-const UMFPACK_U_Qt  =  9     # U*adjoint(Q)x=b
-const UMFPACK_U     =  10    # Ux=b
-const UMFPACK_Q_Ut  =  11    # Q*adjoint(U)x=b
-const UMFPACK_Q_Uat =  12    # Q*transpose(U)x=b
-const UMFPACK_Ut    =  13    # adjoint(U)x=b
-const UMFPACK_Uat   =  14    # transpose(U)x=b
-
-## Sizes of Control and Info arrays for returning information from solver
-const UMFPACK_INFO = 90
-const UMFPACK_CONTROL = 20
-const UMFPACK_PRL = 1
-
-## Status codes
-const UMFPACK_OK = 0
-const UMFPACK_WARNING_singular_matrix       = 1
-const UMFPACK_WARNING_determinant_underflow = 2
-const UMFPACK_WARNING_determinant_overflow  = 3
-const UMFPACK_ERROR_out_of_memory           = -1
-const UMFPACK_ERROR_invalid_Numeric_object  = -3
-const UMFPACK_ERROR_invalid_Symbolic_object = -4
-const UMFPACK_ERROR_argument_missing        = -5
-const UMFPACK_ERROR_n_nonpositive           = -6
-const UMFPACK_ERROR_invalid_matrix          = -8
-const UMFPACK_ERROR_different_pattern       = -11
-const UMFPACK_ERROR_invalid_system          = -13
-const UMFPACK_ERROR_invalid_permutation     = -15
-const UMFPACK_ERROR_internal_error          = -911
-const UMFPACK_ERROR_file_IO                 = -17
-const UMFPACK_ERROR_ordering_failed         = -18
diff --git a/stdlib/SuiteSparse/test/cholmod.jl b/stdlib/SuiteSparse/test/cholmod.jl
deleted file mode 100644
index 020c7129cc56f2..00000000000000
--- a/stdlib/SuiteSparse/test/cholmod.jl
+++ /dev/null
@@ -1,896 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SuiteSparse.CHOLMOD
-using SuiteSparse
-using DelimitedFiles
-using Test
-using Random
-using Serialization
-using LinearAlgebra:
-    I, cholesky, cholesky!, det, diag, eigmax, ishermitian, isposdef, issuccess,
-    issymmetric, ldlt, ldlt!, logdet, norm, opnorm, Diagonal, Hermitian, Symmetric,
-    PosDefException, ZeroPivotException
-using SparseArrays
-using SparseArrays: getcolptr
-
-# CHOLMOD tests
-Random.seed!(123)
-
-@testset "based on deps/SuiteSparse-4.0.2/CHOLMOD/Demo/" begin
-
-# chm_rdsp(joinpath(Sys.BINDIR, "../../deps/SuiteSparse-4.0.2/CHOLMOD/Demo/Matrix/bcsstk01.tri"))
-# because the file may not exist in binary distributions and when a system suitesparse library
-# is used
-
-## Result from C program
-## ---------------------------------- cholmod_demo:
-## norm (A,inf) = 3.57095e+09
-## norm (A,1)   = 3.57095e+09
-## CHOLMOD sparse:  A:  48-by-48, nz 224, upper.  OK
-## CHOLMOD dense:   B:  48-by-1,   OK
-## bnorm 1.97917
-## Analyze: flop 6009 lnz 489
-## Factorizing A
-## CHOLMOD factor:  L:  48-by-48  simplicial, LDL'. nzmax 489.  nz 489  OK
-## Ordering: AMD     fl/lnz       12.3  lnz/anz        2.2
-## ints in L: 782, doubles in L: 489
-## factor flops 6009 nnz(L)             489 (w/no amalgamation)
-## nnz(A*A'):             224
-## flops / nnz(L):      12.3
-## nnz(L) / nnz(A):      2.2
-## analyze cputime:        0.0000
-## factor  cputime:         0.0000 mflop:      0.0
-## solve   cputime:         0.0000 mflop:      0.0
-## overall cputime:         0.0000 mflop:      0.0
-## peak memory usage:            0 (MB)
-## residual  2.5e-19 (|Ax-b|/(|A||x|+|b|))
-## residual  1.3e-19 (|Ax-b|/(|A||x|+|b|)) after iterative refinement
-## rcond     9.5e-06
-
-    n = 48
-    A = CHOLMOD.Sparse(n, n,
-        CHOLMOD.SuiteSparse_long[0,1,2,3,6,9,12,15,18,20,25,30,34,36,39,43,47,52,58,
-        62,67,71,77,84,90,93,95,98,103,106,110,115,119,123,130,136,142,146,150,155,
-        161,167,174,182,189,197,207,215,224], # zero-based column pointers
-        CHOLMOD.SuiteSparse_long[0,1,2,1,2,3,0,2,4,0,1,5,0,4,6,1,3,7,2,8,1,3,7,8,9,
-        0,4,6,8,10,5,6,7,11,6,12,7,11,13,8,10,13,14,9,13,14,15,8,10,12,14,16,7,11,
-        12,13,16,17,0,12,16,18,1,5,13,15,19,2,4,14,20,3,13,15,19,20,21,2,4,12,16,18,
-        20,22,1,5,17,18,19,23,0,5,24,1,25,2,3,26,2,3,25,26,27,4,24,28,0,5,24,29,6,
-        11,24,28,30,7,25,27,31,8,9,26,32,8,9,25,27,31,32,33,10,24,28,30,32,34,6,11,
-        29,30,31,35,12,17,30,36,13,31,35,37,14,15,32,34,38,14,15,33,37,38,39,16,32,
-        34,36,38,40,12,17,31,35,36,37,41,12,16,17,18,23,36,40,42,13,14,15,19,37,39,
-        43,13,14,15,20,21,38,43,44,13,14,15,20,21,37,39,43,44,45,12,16,17,22,36,40,
-        42,46,12,16,17,18,23,41,42,46,47],
-        [2.83226851852e6,1.63544753086e6,1.72436728395e6,-2.0e6,-2.08333333333e6,
-        1.00333333333e9,1.0e6,-2.77777777778e6,1.0675e9,2.08333333333e6,
-        5.55555555555e6,1.53533333333e9,-3333.33333333,-1.0e6,2.83226851852e6,
-        -6666.66666667,2.0e6,1.63544753086e6,-1.68e6,1.72436728395e6,-2.0e6,4.0e8,
-        2.0e6,-2.08333333333e6,1.00333333333e9,1.0e6,2.0e8,-1.0e6,-2.77777777778e6,
-        1.0675e9,-2.0e6,2.08333333333e6,5.55555555555e6,1.53533333333e9,-2.8e6,
-        2.8360994695e6,-30864.1975309,-5.55555555555e6,1.76741074446e6,
-        -15432.0987654,2.77777777778e6,517922.131816,3.89003806848e6,
-        -3.33333333333e6,4.29857058902e6,-2.6349902747e6,1.97572063531e9,
-        -2.77777777778e6,3.33333333333e8,-2.14928529451e6,2.77777777778e6,
-        1.52734651547e9,5.55555555555e6,6.66666666667e8,2.35916180402e6,
-        -5.55555555555e6,-1.09779731332e8,1.56411143711e9,-2.8e6,-3333.33333333,
-        1.0e6,2.83226851852e6,-30864.1975309,-5.55555555555e6,-6666.66666667,
-        -2.0e6,1.63544753086e6,-15432.0987654,2.77777777778e6,-1.68e6,
-        1.72436728395e6,-3.33333333333e6,2.0e6,4.0e8,-2.0e6,-2.08333333333e6,
-        1.00333333333e9,-2.77777777778e6,3.33333333333e8,-1.0e6,2.0e8,1.0e6,
-        2.77777777778e6,1.0675e9,5.55555555555e6,6.66666666667e8,-2.0e6,
-        2.08333333333e6,-5.55555555555e6,1.53533333333e9,-28935.1851852,
-        -2.08333333333e6,60879.6296296,-1.59791666667e6,3.37291666667e6,
-        -28935.1851852,2.08333333333e6,2.41171296296e6,-2.08333333333e6,
-        1.0e8,-2.5e6,-416666.666667,1.5e9,-833333.333333,1.25e6,5.01833333333e8,
-        2.08333333333e6,1.0e8,416666.666667,5.025e8,-28935.1851852,
-        -2.08333333333e6,-4166.66666667,-1.25e6,3.98587962963e6,-1.59791666667e6,
-        -8333.33333333,2.5e6,3.41149691358e6,-28935.1851852,2.08333333333e6,
-        -2.355e6,2.43100308642e6,-2.08333333333e6,1.0e8,-2.5e6,5.0e8,2.5e6,
-        -416666.666667,1.50416666667e9,-833333.333333,1.25e6,2.5e8,-1.25e6,
-        -3.47222222222e6,1.33516666667e9,2.08333333333e6,1.0e8,-2.5e6,
-        416666.666667,6.94444444444e6,2.16916666667e9,-28935.1851852,
-        -2.08333333333e6,-3.925e6,3.98587962963e6,-1.59791666667e6,
-        -38580.2469136,-6.94444444444e6,3.41149691358e6,-28935.1851852,
-        2.08333333333e6,-19290.1234568,3.47222222222e6,2.43100308642e6,
-        -2.08333333333e6,1.0e8,-4.16666666667e6,2.5e6,-416666.666667,
-        1.50416666667e9,-833333.333333,-3.47222222222e6,4.16666666667e8,
-        -1.25e6,3.47222222222e6,1.33516666667e9,2.08333333333e6,1.0e8,
-        6.94444444445e6,8.33333333333e8,416666.666667,-6.94444444445e6,
-        2.16916666667e9,-3830.95098171,1.14928529451e6,-275828.470683,
-        -28935.1851852,-2.08333333333e6,-4166.66666667,1.25e6,64710.5806113,
-        -131963.213599,-517922.131816,-2.29857058902e6,-1.59791666667e6,
-        -8333.33333333,-2.5e6,3.50487988027e6,-517922.131816,-2.16567078453e6,
-        551656.941366,-28935.1851852,2.08333333333e6,-2.355e6,517922.131816,
-        4.57738374749e6,2.29857058902e6,-551656.941367,4.8619365099e8,
-        -2.08333333333e6,1.0e8,2.5e6,5.0e8,-4.79857058902e6,134990.2747,
-        2.47238730198e9,-1.14928529451e6,2.29724661236e8,-5.57173510779e7,
-        -833333.333333,-1.25e6,2.5e8,2.39928529451e6,9.61679848804e8,275828.470683,
-        -5.57173510779e7,1.09411960038e7,2.08333333333e6,1.0e8,-2.5e6,
-        140838.195984,-1.09779731332e8,5.31278103775e8], 1)
-    @test CHOLMOD.norm_sparse(A, 0) ≈ 3.570948074697437e9
-    @test CHOLMOD.norm_sparse(A, 1) ≈ 3.570948074697437e9
-    @test_throws ArgumentError CHOLMOD.norm_sparse(A, 2)
-    @test CHOLMOD.isvalid(A)
-
-    x = fill(1., n)
-    b = A*x
-
-    chma = ldlt(A)                      # LDL' form
-    @test CHOLMOD.isvalid(chma)
-    @test unsafe_load(pointer(chma)).is_ll == 0    # check that it is in fact an LDLt
-    @test chma\b ≈ x
-    @test nnz(ldlt(A, perm=1:size(A,1))) > nnz(chma)
-    @test size(chma) == size(A)
-    chmal = CHOLMOD.FactorComponent(chma, :L)
-    @test size(chmal) == size(A)
-    @test size(chmal, 1) == size(A, 1)
-
-    chma = cholesky(A)                      # LL' form
-    @test CHOLMOD.isvalid(chma)
-    @test unsafe_load(pointer(chma)).is_ll == 1    # check that it is in fact an LLt
-    @test chma\b ≈ x
-    @test nnz(chma) == 489
-    @test nnz(cholesky(A, perm=1:size(A,1))) > nnz(chma)
-    @test size(chma) == size(A)
-    chmal = CHOLMOD.FactorComponent(chma, :L)
-    @test size(chmal) == size(A)
-    @test size(chmal, 1) == size(A, 1)
-
-    @testset "eltype" begin
-        @test eltype(Dense(fill(1., 3))) == Float64
-        @test eltype(A) == Float64
-        @test eltype(chma) == Float64
-    end
-end
-
-@testset "lp_afiro example" begin
-    afiro = CHOLMOD.Sparse(27, 51,
-        CHOLMOD.SuiteSparse_long[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
-        23,25,27,29,33,37,41,45,47,49,51,53,55,57,59,63,65,67,69,71,75,79,83,87,89,
-        91,93,95,97,99,101,102],
-        CHOLMOD.SuiteSparse_long[2,3,6,7,8,9,12,13,16,17,18,19,20,21,22,23,24,25,26,
-        0,1,2,23,0,3,0,21,1,25,4,5,6,24,4,5,7,24,4,5,8,24,4,5,9,24,6,20,7,20,8,20,9,
-        20,3,4,4,22,5,26,10,11,12,21,10,13,10,23,10,20,11,25,14,15,16,22,14,15,17,
-        22,14,15,18,22,14,15,19,22,16,20,17,20,18,20,19,20,13,15,15,24,14,26,15],
-        [1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,
-        1.0,-1.0,-1.06,1.0,0.301,1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,-1.06,1.0,0.301,
-        -1.0,-1.06,1.0,0.313,-1.0,-0.96,1.0,0.313,-1.0,-0.86,1.0,0.326,-1.0,2.364,
-        -1.0,2.386,-1.0,2.408,-1.0,2.429,1.4,1.0,1.0,-1.0,1.0,1.0,-1.0,-0.43,1.0,
-        0.109,1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,-0.43,1.0,1.0,0.109,-0.43,1.0,1.0,
-        0.108,-0.39,1.0,1.0,0.108,-0.37,1.0,1.0,0.107,-1.0,2.191,-1.0,2.219,-1.0,
-        2.249,-1.0,2.279,1.4,-1.0,1.0,-1.0,1.0,1.0,1.0], 0)
-    afiro2 = CHOLMOD.aat(afiro, CHOLMOD.SuiteSparse_long[0:50;], CHOLMOD.SuiteSparse_long(1))
-    CHOLMOD.change_stype!(afiro2, -1)
-    chmaf = cholesky(afiro2)
-    y = afiro'*fill(1., size(afiro,1))
-    sol = chmaf\(afiro*y) # least squares solution
-    @test CHOLMOD.isvalid(sol)
-    pred = afiro'*sol
-    @test norm(afiro * (convert(Matrix, y) - convert(Matrix, pred))) < 1e-8
-end
-
-@testset "Issue 9160" begin
-    local A, B
-    A = sprand(10, 10, 0.1)
-    A = convert(SparseMatrixCSC{Float64,CHOLMOD.SuiteSparse_long}, A)
-    cmA = CHOLMOD.Sparse(A)
-
-    B = sprand(10, 10, 0.1)
-    B = convert(SparseMatrixCSC{Float64,CHOLMOD.SuiteSparse_long}, B)
-    cmB = CHOLMOD.Sparse(B)
-
-    # Ac_mul_B
-    @test sparse(cmA'*cmB) ≈ A'*B
-
-    # A_mul_Bc
-    @test sparse(cmA*cmB') ≈ A*B'
-
-    # A_mul_Ac
-    @test sparse(cmA*cmA') ≈ A*A'
-
-    # Ac_mul_A
-    @test sparse(cmA'*cmA) ≈ A'*A
-
-    # A_mul_Ac for symmetric A
-    A = 0.5*(A + copy(A'))
-    cmA = CHOLMOD.Sparse(A)
-    @test sparse(cmA*cmA') ≈ A*A'
-end
-
-@testset "Issue #9915" begin
-    sparseI = sparse(1.0I, 2, 2)
-    @test sparseI \ sparseI == sparseI
-end
-
-@testset "test Sparse constructor Symmetric and Hermitian input (and issymmetric and ishermitian)" begin
-    ACSC = sprandn(10, 10, 0.3) + I
-    @test issymmetric(Sparse(Symmetric(ACSC, :L)))
-    @test issymmetric(Sparse(Symmetric(ACSC, :U)))
-    @test ishermitian(Sparse(Hermitian(complex(ACSC), :L)))
-    @test ishermitian(Sparse(Hermitian(complex(ACSC), :U)))
-end
-
-@testset "test Sparse constructor for C_Sparse{Cvoid} (and read_sparse)" begin
-    mktempdir() do temp_dir
-        testfile = joinpath(temp_dir, "tmp.mtx")
-
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate real symmetric","3 3 4","1 1 1","2 2 1","3 2 0.5","3 3 1"])
-        @test sparse(CHOLMOD.Sparse(testfile)) == [1 0 0;0 1 0.5;0 0.5 1]
-        rm(testfile)
-
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate complex Hermitian",
-                        "3 3 4","1 1 1.0 0.0","2 2 1.0 0.0","3 2 0.5 0.5","3 3 1.0 0.0"])
-        @test sparse(CHOLMOD.Sparse(testfile)) == [1 0 0;0 1 0.5-0.5im;0 0.5+0.5im 1]
-        rm(testfile)
-
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate real symmetric","%3 3 4","1 1 1","2 2 1","3 2 0.5","3 3 1"])
-        @test_throws ArgumentError sparse(CHOLMOD.Sparse(testfile))
-        rm(testfile)
-    end
-end
-
-@testset "test that Sparse(Ptr) constructor throws the right places" begin
-    @test_throws ArgumentError CHOLMOD.Sparse(convert(Ptr{CHOLMOD.C_Sparse{Float64}}, C_NULL))
-    @test_throws ArgumentError CHOLMOD.Sparse(convert(Ptr{CHOLMOD.C_Sparse{Cvoid}}, C_NULL))
-end
-
-## The struct pointer must be constructed by the library constructor and then modified afterwards to checks that the method throws
-@testset "illegal dtype (for now but should be supported at some point)" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint, CHOLMOD.SINGLE, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 4)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-@testset "illegal dtype" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint, 5, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 4)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-@testset "illegal xtype" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint, 3, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 3)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-@testset "illegal itype I" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint, CHOLMOD.INTLONG, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 2)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-@testset "illegal itype II" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint,  5, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 2)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-# Test Dense wrappers (only Float64 supported a present)
-
-@testset "High level interface" for elty in (Float64, ComplexF64)
-    local A, b
-    if elty == Float64
-        A = randn(5, 5)
-        b = randn(5)
-    else
-        A = complex.(randn(5, 5), randn(5, 5))
-        b = complex.(randn(5), randn(5))
-    end
-    ADense = CHOLMOD.Dense(A)
-    bDense = CHOLMOD.Dense(b)
-
-    @test_throws BoundsError ADense[6, 1]
-    @test_throws BoundsError ADense[1, 6]
-    @test copy(ADense) == ADense
-    @test CHOLMOD.norm_dense(ADense, 1) ≈ opnorm(A, 1)
-    @test CHOLMOD.norm_dense(ADense, 0) ≈ opnorm(A, Inf)
-    @test_throws ArgumentError CHOLMOD.norm_dense(ADense, 2)
-    @test_throws ArgumentError CHOLMOD.norm_dense(ADense, 3)
-
-    @test CHOLMOD.norm_dense(bDense, 2) ≈ norm(b)
-    @test CHOLMOD.check_dense(bDense)
-
-    AA = CHOLMOD.eye(3)
-    unsafe_store!(convert(Ptr{Csize_t}, pointer(AA)), 2, 1) # change size, but not stride, of Dense
-    @test convert(Matrix, AA) == Matrix(I, 2, 3)
-end
-
-@testset "Low level interface" begin
-    @test isa(CHOLMOD.zeros(3, 3, Float64), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.zeros(3, 3), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.zeros(3, 3, Float64), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.ones(3, 3), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.eye(3, 4, Float64), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.eye(3, 4), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.eye(3), CHOLMOD.Dense{Float64})
-    @test isa(copy(CHOLMOD.eye(3)), CHOLMOD.Dense{Float64})
-end
-
-# Test Sparse and Factor
-@testset "test free!" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Float64}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    @test CHOLMOD.free!(p)
-end
-
-@testset "Core functionality" for elty in (Float64, ComplexF64)
-    A1 = sparse([1:5; 1], [1:5; 2], elty == Float64 ? randn(6) : complex.(randn(6), randn(6)))
-    A2 = sparse([1:5; 1], [1:5; 2], elty == Float64 ? randn(6) : complex.(randn(6), randn(6)))
-    A1pd = A1'A1
-    A1Sparse = CHOLMOD.Sparse(A1)
-    A2Sparse = CHOLMOD.Sparse(A2)
-    A1pdSparse = CHOLMOD.Sparse(
-        size(A1pd, 1),
-        size(A1pd, 2),
-        SuiteSparse.decrement(getcolptr(A1pd)),
-        SuiteSparse.decrement(rowvals(A1pd)),
-        nonzeros(A1pd))
-
-    ## High level interface
-    @test isa(CHOLMOD.Sparse(3, 3, [0,1,3,4], [0,2,1,2], fill(1., 4)), CHOLMOD.Sparse) # Sparse doesn't require columns to be sorted
-    @test_throws BoundsError A1Sparse[6, 1]
-    @test_throws BoundsError A1Sparse[1, 6]
-    @test sparse(A1Sparse) == A1
-    for i = 1:size(A1, 1)
-        A1[i, i] = real(A1[i, i])
-    end #Construct Hermitian matrix properly
-    @test CHOLMOD.sparse(CHOLMOD.Sparse(Hermitian(A1, :L))) == Hermitian(A1, :L)
-    @test CHOLMOD.sparse(CHOLMOD.Sparse(Hermitian(A1, :U))) == Hermitian(A1, :U)
-    @test_throws ArgumentError convert(SparseMatrixCSC{elty,Int}, A1pdSparse)
-    if elty <: Real
-        @test_throws ArgumentError convert(Symmetric{Float64,SparseMatrixCSC{Float64,Int}}, A1Sparse)
-    else
-        @test_throws ArgumentError convert(Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,Int}}, A1Sparse)
-    end
-    @test copy(A1Sparse) == A1Sparse
-    @test size(A1Sparse, 3) == 1
-    if elty <: Real # multiplication only defined for real matrices in CHOLMOD
-        @test A1Sparse*A2Sparse ≈ A1*A2
-        @test_throws DimensionMismatch CHOLMOD.Sparse(A1[:,1:4])*A2Sparse
-        @test A1Sparse'A2Sparse ≈ A1'A2
-        @test A1Sparse*A2Sparse' ≈ A1*A2'
-
-        @test A1Sparse*A1Sparse ≈ A1*A1
-        @test A1Sparse'A1Sparse ≈ A1'A1
-        @test A1Sparse*A1Sparse' ≈ A1*A1'
-
-        @test A1pdSparse*A1pdSparse ≈ A1pd*A1pd
-        @test A1pdSparse'A1pdSparse ≈ A1pd'A1pd
-        @test A1pdSparse*A1pdSparse' ≈ A1pd*A1pd'
-
-        @test_throws DimensionMismatch A1Sparse*CHOLMOD.eye(4, 5, elty)
-    end
-
-    # Factor
-    @test_throws ArgumentError cholesky(A1)
-    @test_throws ArgumentError cholesky(A1)
-    @test_throws ArgumentError cholesky(A1, shift=1.0)
-    @test_throws ArgumentError ldlt(A1)
-    @test_throws ArgumentError ldlt(A1, shift=1.0)
-    C = A1 + copy(adjoint(A1))
-    λmaxC = eigmax(Array(C))
-    b = fill(1., size(A1, 1))
-    @test_throws PosDefException cholesky(C - 2λmaxC*I)
-    @test_throws PosDefException cholesky(C, shift=-2λmaxC)
-    @test_throws ZeroPivotException ldlt(C - C[1,1]*I)
-    @test_throws ZeroPivotException ldlt(C, shift=-real(C[1,1]))
-    @test !isposdef(cholesky(C - 2λmaxC*I; check = false))
-    @test !isposdef(cholesky(C, shift=-2λmaxC; check = false))
-    @test !issuccess(ldlt(C - C[1,1]*I; check = false))
-    @test !issuccess(ldlt(C, shift=-real(C[1,1]); check = false))
-    F = cholesky(A1pd)
-    tmp = IOBuffer()
-    show(tmp, F)
-    @test tmp.size > 0
-    @test isa(CHOLMOD.Sparse(F), CHOLMOD.Sparse{elty})
-    @test_throws DimensionMismatch F\CHOLMOD.Dense(fill(elty(1), 4))
-    @test_throws DimensionMismatch F\CHOLMOD.Sparse(sparse(fill(elty(1), 4)))
-    b = fill(1., 5)
-    bT = fill(elty(1), 5)
-    @test F'\bT ≈ Array(A1pd)'\b
-    @test F'\sparse(bT) ≈ Array(A1pd)'\b
-    @test transpose(F)\bT ≈ conj(A1pd)'\bT
-    @test F\CHOLMOD.Sparse(sparse(bT)) ≈ A1pd\b
-    @test logdet(F) ≈ logdet(Array(A1pd))
-    @test det(F) == exp(logdet(F))
-    let # to test supernodal, we must use a larger matrix
-        Ftmp = sprandn(100, 100, 0.1)
-        Ftmp = Ftmp'Ftmp + I
-        @test logdet(cholesky(Ftmp)) ≈ logdet(Array(Ftmp))
-    end
-    @test logdet(ldlt(A1pd)) ≈ logdet(Array(A1pd))
-    @test isposdef(A1pd)
-    @test !isposdef(A1)
-    @test !isposdef(A1 + copy(A1') |> t -> t - 2eigmax(Array(t))*I)
-
-    if elty <: Real
-        @test CHOLMOD.issymmetric(Sparse(A1pd, 0))
-        @test CHOLMOD.Sparse(cholesky(Symmetric(A1pd, :L))) == CHOLMOD.Sparse(cholesky(A1pd))
-        F1 = CHOLMOD.Sparse(cholesky(Symmetric(A1pd, :L), shift=2))
-        F2 = CHOLMOD.Sparse(cholesky(A1pd, shift=2))
-        @test F1 == F2
-        @test CHOLMOD.Sparse(ldlt(Symmetric(A1pd, :L))) == CHOLMOD.Sparse(ldlt(A1pd))
-        F1 = CHOLMOD.Sparse(ldlt(Symmetric(A1pd, :L), shift=2))
-        F2 = CHOLMOD.Sparse(ldlt(A1pd, shift=2))
-        @test F1 == F2
-    else
-        @test !CHOLMOD.issymmetric(Sparse(A1pd, 0))
-        @test CHOLMOD.ishermitian(Sparse(A1pd, 0))
-        @test CHOLMOD.Sparse(cholesky(Hermitian(A1pd, :L))) == CHOLMOD.Sparse(cholesky(A1pd))
-        F1 = CHOLMOD.Sparse(cholesky(Hermitian(A1pd, :L), shift=2))
-        F2 = CHOLMOD.Sparse(cholesky(A1pd, shift=2))
-        @test F1 == F2
-        @test CHOLMOD.Sparse(ldlt(Hermitian(A1pd, :L))) == CHOLMOD.Sparse(ldlt(A1pd))
-        F1 = CHOLMOD.Sparse(ldlt(Hermitian(A1pd, :L), shift=2))
-        F2 = CHOLMOD.Sparse(ldlt(A1pd, shift=2))
-        @test F1 == F2
-    end
-
-    ### cholesky!/ldlt!
-    F = cholesky(A1pd)
-    CHOLMOD.change_factor!(F, false, false, true, true)
-    @test unsafe_load(pointer(F)).is_ll == 0
-    CHOLMOD.change_factor!(F, true, false, true, true)
-    @test CHOLMOD.Sparse(cholesky!(copy(F), A1pd)) ≈ CHOLMOD.Sparse(F) # surprisingly, this can cause small ulp size changes so we cannot test exact equality
-    @test size(F, 2) == 5
-    @test size(F, 3) == 1
-    @test_throws ArgumentError size(F, 0)
-
-    F = cholesky(A1pdSparse, shift=2)
-    @test isa(CHOLMOD.Sparse(F), CHOLMOD.Sparse{elty})
-    @test CHOLMOD.Sparse(cholesky!(copy(F), A1pd, shift=2.0)) ≈ CHOLMOD.Sparse(F) # surprisingly, this can cause small ulp size changes so we cannot test exact equality
-
-    F = ldlt(A1pd)
-    @test isa(CHOLMOD.Sparse(F), CHOLMOD.Sparse{elty})
-    @test CHOLMOD.Sparse(ldlt!(copy(F), A1pd)) ≈ CHOLMOD.Sparse(F) # surprisingly, this can cause small ulp size changes so we cannot test exact equality
-
-    F = ldlt(A1pdSparse, shift=2)
-    @test isa(CHOLMOD.Sparse(F), CHOLMOD.Sparse{elty})
-    @test CHOLMOD.Sparse(ldlt!(copy(F), A1pd, shift=2.0)) ≈ CHOLMOD.Sparse(F) # surprisingly, this can cause small ulp size changes so we cannot test exact equality
-
-    @test isa(CHOLMOD.factor_to_sparse!(F), CHOLMOD.Sparse)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.factor_to_sparse!(F)
-
-    ## Low level interface
-    @test CHOLMOD.nnz(A1Sparse) == nnz(A1)
-    @test CHOLMOD.speye(5, 5, elty) == Matrix(I, 5, 5)
-    @test CHOLMOD.spzeros(5, 5, 5, elty) == zeros(elty, 5, 5)
-    if elty <: Real
-        @test CHOLMOD.copy(A1Sparse, 0, 1) == A1Sparse
-        @test CHOLMOD.horzcat(A1Sparse, A2Sparse, true) == [A1 A2]
-        @test CHOLMOD.vertcat(A1Sparse, A2Sparse, true) == [A1; A2]
-        svec = fill(elty(1), 1)
-        @test CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.SCALAR, A1Sparse) == A1Sparse
-        svec = fill(elty(1), 5)
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.SCALAR, A1Sparse)
-        @test CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.ROW, A1Sparse) == A1Sparse
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense([svec; 1]), CHOLMOD.ROW, A1Sparse)
-        @test CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.COL, A1Sparse) == A1Sparse
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense([svec; 1]), CHOLMOD.COL, A1Sparse)
-        @test CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.SYM, A1Sparse) == A1Sparse
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense([svec; 1]), CHOLMOD.SYM, A1Sparse)
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.SYM, CHOLMOD.Sparse(A1[:,1:4]))
-    else
-        @test_throws MethodError CHOLMOD.copy(A1Sparse, 0, 1) == A1Sparse
-        @test_throws MethodError CHOLMOD.horzcat(A1Sparse, A2Sparse, true) == [A1 A2]
-        @test_throws MethodError CHOLMOD.vertcat(A1Sparse, A2Sparse, true) == [A1; A2]
-    end
-
-    if elty <: Real
-        @test CHOLMOD.ssmult(A1Sparse, A2Sparse, 0, true, true) ≈ A1*A2
-        @test CHOLMOD.aat(A1Sparse, [0:size(A1,2)-1;], 1) ≈ A1*A1'
-        @test CHOLMOD.aat(A1Sparse, [0:1;], 1) ≈ A1[:,1:2]*A1[:,1:2]'
-        @test CHOLMOD.copy(A1Sparse, 0, 1) == A1Sparse
-    end
-
-    @test CHOLMOD.Sparse(CHOLMOD.Dense(A1Sparse)) == A1Sparse
-end
-
-@testset "extract factors" begin
-    Af = float([4 12 -16; 12 37 -43; -16 -43 98])
-    As = sparse(Af)
-    Lf = float([2 0 0; 6 1 0; -8 5 3])
-    LDf = float([4 0 0; 3 1 0; -4 5 9])  # D is stored along the diagonal
-    L_f = float([1 0 0; 3 1 0; -4 5 1])  # L by itself in LDLt of Af
-    D_f = float([4 0 0; 0 1 0; 0 0 9])
-    p = [2,3,1]
-    p_inv = [3,1,2]
-
-    @testset "cholesky, no permutation" begin
-        Fs = cholesky(As, perm=[1:3;])
-        @test Fs.p == [1:3;]
-        @test sparse(Fs.L) ≈ Lf
-        @test sparse(Fs) ≈ As
-        b = rand(3)
-        @test Fs\b ≈ Af\b
-        @test Fs.UP\(Fs.PtL\b) ≈ Af\b
-        @test Fs.L\b ≈ Lf\b
-        @test Fs.U\b ≈ Lf'\b
-        @test Fs.L'\b ≈ Lf'\b
-        @test Fs.U'\b ≈ Lf\b
-        @test Fs.PtL\b ≈ Lf\b
-        @test Fs.UP\b ≈ Lf'\b
-        @test Fs.PtL'\b ≈ Lf'\b
-        @test Fs.UP'\b ≈ Lf\b
-        @test_throws CHOLMOD.CHOLMODException Fs.D
-        @test_throws CHOLMOD.CHOLMODException Fs.LD
-        @test_throws CHOLMOD.CHOLMODException Fs.DU
-        @test_throws CHOLMOD.CHOLMODException Fs.PLD
-        @test_throws CHOLMOD.CHOLMODException Fs.DUPt
-    end
-
-    @testset "cholesky, with permutation" begin
-        Fs = cholesky(As, perm=p)
-        @test Fs.p == p
-        Afp = Af[p,p]
-        Lfp = cholesky(Afp).L
-        Ls = sparse(Fs.L)
-        @test Ls ≈ Lfp
-        @test Ls * Ls' ≈ Afp
-        P = sparse(1:3, Fs.p, ones(3))
-        @test P' * Ls * Ls' * P ≈ As
-        @test sparse(Fs) ≈ As
-        b = rand(3)
-        @test Fs\b ≈ Af\b
-        @test Fs.UP\(Fs.PtL\b) ≈ Af\b
-        @test Fs.L\b ≈ Lfp\b
-        @test Fs.U'\b ≈ Lfp\b
-        @test Fs.U\b ≈ Lfp'\b
-        @test Fs.L'\b ≈ Lfp'\b
-        @test Fs.PtL\b ≈ Lfp\b[p]
-        @test Fs.UP\b ≈ (Lfp'\b)[p_inv]
-        @test Fs.PtL'\b ≈ (Lfp'\b)[p_inv]
-        @test Fs.UP'\b ≈ Lfp\b[p]
-        @test_throws CHOLMOD.CHOLMODException Fs.PL
-        @test_throws CHOLMOD.CHOLMODException Fs.UPt
-        @test_throws CHOLMOD.CHOLMODException Fs.D
-        @test_throws CHOLMOD.CHOLMODException Fs.LD
-        @test_throws CHOLMOD.CHOLMODException Fs.DU
-        @test_throws CHOLMOD.CHOLMODException Fs.PLD
-        @test_throws CHOLMOD.CHOLMODException Fs.DUPt
-    end
-
-    @testset "ldlt, no permutation" begin
-        Fs = ldlt(As, perm=[1:3;])
-        @test Fs.p == [1:3;]
-        @test sparse(Fs.LD) ≈ LDf
-        @test sparse(Fs) ≈ As
-        b = rand(3)
-        @test Fs\b ≈ Af\b
-        @test Fs.UP\(Fs.PtLD\b) ≈ Af\b
-        @test Fs.DUP\(Fs.PtL\b) ≈ Af\b
-        @test Fs.L\b ≈ L_f\b
-        @test Fs.U\b ≈ L_f'\b
-        @test Fs.L'\b ≈ L_f'\b
-        @test Fs.U'\b ≈ L_f\b
-        @test Fs.PtL\b ≈ L_f\b
-        @test Fs.UP\b ≈ L_f'\b
-        @test Fs.PtL'\b ≈ L_f'\b
-        @test Fs.UP'\b ≈ L_f\b
-        @test Fs.D\b ≈ D_f\b
-        @test Fs.D'\b ≈ D_f\b
-        @test Fs.LD\b ≈ D_f\(L_f\b)
-        @test Fs.DU'\b ≈ D_f\(L_f\b)
-        @test Fs.LD'\b ≈ L_f'\(D_f\b)
-        @test Fs.DU\b ≈ L_f'\(D_f\b)
-        @test Fs.PtLD\b ≈ D_f\(L_f\b)
-        @test Fs.DUP'\b ≈ D_f\(L_f\b)
-        @test Fs.PtLD'\b ≈ L_f'\(D_f\b)
-        @test Fs.DUP\b ≈ L_f'\(D_f\b)
-    end
-
-    @testset "ldlt, with permutation" begin
-        Fs = ldlt(As, perm=p)
-        @test Fs.p == p
-        @test sparse(Fs) ≈ As
-        b = rand(3)
-        Asp = As[p,p]
-        LDp = sparse(ldlt(Asp, perm=[1,2,3]).LD)
-        # LDp = sparse(Fs.LD)
-        Lp, dp = SuiteSparse.CHOLMOD.getLd!(copy(LDp))
-        Dp = sparse(Diagonal(dp))
-        @test Fs\b ≈ Af\b
-        @test Fs.UP\(Fs.PtLD\b) ≈ Af\b
-        @test Fs.DUP\(Fs.PtL\b) ≈ Af\b
-        @test Fs.L\b ≈ Lp\b
-        @test Fs.U\b ≈ Lp'\b
-        @test Fs.L'\b ≈ Lp'\b
-        @test Fs.U'\b ≈ Lp\b
-        @test Fs.PtL\b ≈ Lp\b[p]
-        @test Fs.UP\b ≈ (Lp'\b)[p_inv]
-        @test Fs.PtL'\b ≈ (Lp'\b)[p_inv]
-        @test Fs.UP'\b ≈ Lp\b[p]
-        @test Fs.LD\b ≈ Dp\(Lp\b)
-        @test Fs.DU'\b ≈ Dp\(Lp\b)
-        @test Fs.LD'\b ≈ Lp'\(Dp\b)
-        @test Fs.DU\b ≈ Lp'\(Dp\b)
-        @test Fs.PtLD\b ≈ Dp\(Lp\b[p])
-        @test Fs.DUP'\b ≈ Dp\(Lp\b[p])
-        @test Fs.PtLD'\b ≈ (Lp'\(Dp\b))[p_inv]
-        @test Fs.DUP\b ≈ (Lp'\(Dp\b))[p_inv]
-        @test_throws CHOLMOD.CHOLMODException Fs.DUPt
-        @test_throws CHOLMOD.CHOLMODException Fs.PLD
-    end
-
-    @testset "Element promotion and type inference" begin
-        @inferred cholesky(As)\fill(1, size(As, 1))
-        @inferred ldlt(As)\fill(1, size(As, 1))
-    end
-end
-
-@testset "Issue 11745 - row and column pointers were not sorted in sparse(Factor)" begin
-    A = Float64[10 1 1 1; 1 10 0 0; 1 0 10 0; 1 0 0 10]
-    @test sparse(cholesky(sparse(A))) ≈ A
-end
-GC.gc()
-
-@testset "Issue 11747 - Wrong show method defined for FactorComponent" begin
-    v = cholesky(sparse(Float64[ 10 1 1 1; 1 10 0 0; 1 0 10 0; 1 0 0 10])).L
-    for s in (sprint(show, MIME("text/plain"), v), sprint(show, v))
-        @test occursin("method:  simplicial", s)
-        @test !occursin("#undef", s)
-    end
-end
-
-@testset "Issue 14076" begin
-    @test cholesky(sparse([1,2,3,4], [1,2,3,4], Float32[1,4,16,64]))\[1,4,16,64] == fill(1, 4)
-end
-
-@testset "Issue 29367" begin
-    if Int != Int32
-        @test_throws MethodError cholesky(sparse(Int32[1,2,3,4], Int32[1,2,3,4], Float64[1,4,16,64]))
-        @test_throws MethodError cholesky(sparse(Int32[1,2,3,4], Int32[1,2,3,4], Float32[1,4,16,64]))
-        @test_throws MethodError ldlt(sparse(Int32[1,2,3,4], Int32[1,2,3,4], Float64[1,4,16,64]))
-        @test_throws MethodError ldlt(sparse(Int32[1,2,3,4], Int32[1,2,3,4], Float32[1,4,16,64]))
-    end
-end
-
-@testset "Issue 14134" begin
-    A = CHOLMOD.Sparse(sprandn(10,5,0.1) + I |> t -> t't)
-    b = IOBuffer()
-    serialize(b, A)
-    seekstart(b)
-    Anew = deserialize(b)
-    @test_throws ArgumentError show(Anew)
-    @test_throws ArgumentError size(Anew)
-    @test_throws ArgumentError Anew[1]
-    @test_throws ArgumentError Anew[2,1]
-    F = cholesky(A)
-    serialize(b, F)
-    seekstart(b)
-    Fnew = deserialize(b)
-    @test_throws ArgumentError Fnew\fill(1., 5)
-    @test_throws ArgumentError show(Fnew)
-    @test_throws ArgumentError size(Fnew)
-    @test_throws ArgumentError diag(Fnew)
-    @test_throws ArgumentError logdet(Fnew)
-end
-
-@testset "Issue #28985" begin
-    @test typeof(cholesky(sparse(I, 4, 4))'\rand(4)) == Array{Float64, 1}
-    @test typeof(cholesky(sparse(I, 4, 4))'\rand(4,1)) == Array{Float64, 2}
-end
-
-@testset "Issue with promotion during conversion to CHOLMOD.Dense" begin
-    @test CHOLMOD.Dense(fill(1, 5)) == fill(1, 5, 1)
-    @test CHOLMOD.Dense(fill(1f0, 5)) == fill(1, 5, 1)
-    @test CHOLMOD.Dense(fill(1f0 + 0im, 5, 2)) == fill(1, 5, 2)
-end
-
-@testset "Further issue with promotion #14894" begin
-    x = fill(1., 5)
-    @test cholesky(sparse(Float16(1)I, 5, 5))\x == x
-    @test cholesky(Symmetric(sparse(Float16(1)I, 5, 5)))\x == x
-    @test cholesky(Hermitian(sparse(Complex{Float16}(1)I, 5, 5)))\x == x
-    @test_throws TypeError cholesky(sparse(BigFloat(1)I, 5, 5))
-    @test_throws TypeError cholesky(Symmetric(sparse(BigFloat(1)I, 5, 5)))
-    @test_throws TypeError cholesky(Hermitian(sparse(Complex{BigFloat}(1)I, 5, 5)))
-end
-
-@testset "test \\ for Factor and StridedVecOrMat" begin
-    x = rand(5)
-    A = cholesky(sparse(Diagonal(x.\1)))
-    @test A\view(fill(1.,10),1:2:10) ≈ x
-    @test A\view(Matrix(1.0I, 5, 5), :, :) ≈ Matrix(Diagonal(x))
-end
-
-@testset "Test \\ for Factor and SparseVecOrMat" begin
-    sparseI = sparse(1.0I, 100, 100)
-    sparseb = sprandn(100, 0.5)
-    sparseB = sprandn(100, 100, 0.5)
-    chI = cholesky(sparseI)
-    @test chI \ sparseb ≈ sparseb
-    @test chI \ sparseB ≈ sparseB
-    @test chI \ sparseI ≈ sparseI
-end
-
-@testset "Real factorization and complex rhs" begin
-    A = sprandn(5, 5, 0.4) |> t -> t't + I
-    B = complex.(randn(5, 2), randn(5, 2))
-    @test cholesky(A)\B ≈ A\B
-end
-
-@testset "Make sure that ldlt performs an LDLt (Issue #19032)" begin
-    m, n = 400, 500
-    A = sprandn(m, n, .2)
-    M = [I copy(A'); A -I]
-    b = M * fill(1., m+n)
-    F = ldlt(M)
-    s = unsafe_load(pointer(F))
-    @test s.is_super == 0
-    @test F\b ≈ fill(1., m+n)
-    F2 = cholesky(M; check = false)
-    @test !issuccess(F2)
-    ldlt!(F2, M)
-    @test issuccess(F2)
-    @test F2\b ≈ fill(1., m+n)
-end
-
-@testset "Test that imaginary parts in Hermitian{T,SparseMatrixCSC{T}} are ignored" begin
-    A = sparse([1,2,3,4,1], [1,2,3,4,2], [complex(2.0,1),2,2,2,1])
-    Fs = cholesky(Hermitian(A))
-    Fd = cholesky(Hermitian(Array(A)))
-    @test sparse(Fs) ≈ Hermitian(A)
-    @test Fs\fill(1., 4) ≈ Fd\fill(1., 4)
-end
-
-@testset "\\ '\\ and transpose(...)\\" begin
-    # Test that \ and '\ and transpose(...)\ work for Symmetric and Hermitian. This is just
-    # a dispatch exercise so it doesn't matter that the complex matrix has
-    # zero imaginary parts
-    Apre = sprandn(10, 10, 0.2) - I
-    for A in (Symmetric(Apre), Hermitian(Apre),
-              Symmetric(Apre + 10I), Hermitian(Apre + 10I),
-              Hermitian(complex(Apre)), Hermitian(complex(Apre) + 10I))
-        local A, x, b
-        x = fill(1., 10)
-        b = A*x
-        @test x ≈ A\b
-        @test transpose(A)\b ≈ A'\b
-    end
-end
-
-@testset "Check that Symmetric{SparseMatrixCSC} can be constructed from CHOLMOD.Sparse" begin
-    Int === Int32 && Random.seed!(124)
-    A = sprandn(10, 10, 0.1)
-    B = CHOLMOD.Sparse(A)
-    C = B'B
-    # Change internal representation to symmetric (upper/lower)
-    o = fieldoffset(CHOLMOD.C_Sparse{eltype(C)}, findall(fieldnames(CHOLMOD.C_Sparse{eltype(C)}) .== :stype)[1])
-    for uplo in (1, -1)
-        unsafe_store!(Ptr{Int8}(pointer(C)), uplo, Int(o) + 1)
-        @test convert(Symmetric{Float64,SparseMatrixCSC{Float64,Int}}, C) ≈ Symmetric(A'A)
-    end
-end
-
-@testset "Check inputs to Sparse. Related to #20024" for A_ in (
-    SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1,2], Float64[]),
-    SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1,2], Float64[1.0]))
-    args = (size(A_)..., getcolptr(A_) .- 1, rowvals(A_) .- 1, nonzeros(A_))
-    @test_throws ArgumentError CHOLMOD.Sparse(args...)
-    @test_throws ArgumentError CHOLMOD.Sparse(A_)
-end
-
-@testset "sparse right multiplication of Symmetric and Hermitian matrices #21431" begin
-    S = sparse(1.0I, 2, 2)
-    @test issparse(S*S*S)
-    for T in (Symmetric, Hermitian)
-        @test issparse(S*T(S)*S)
-        @test issparse(S*(T(S)*S))
-        @test issparse((S*T(S))*S)
-    end
-end
-
-@testset "Test sparse low rank update for cholesky decomposion" begin
-    A = SparseMatrixCSC{Float64,CHOLMOD.SuiteSparse_long}(10, 5, [1,3,6,8,10,13], [6,7,1,2,9,3,5,1,7,6,7,9],
-        [-0.138843, 2.99571, -0.556814, 0.669704, -1.39252, 1.33814,
-        1.02371, -0.502384, 1.10686, 0.262229, -1.6935, 0.525239])
-    AtA = A'*A
-    C0 = [1., 2., 0, 0, 0]
-    # Test both cholesky and LDLt with and without automatic permutations
-    for F in (cholesky(AtA), cholesky(AtA, perm=1:5), ldlt(AtA), ldlt(AtA, perm=1:5))
-        local F
-        x0 = F\(b = fill(1., 5))
-        #Test both sparse/dense and vectors/matrices
-        for Ctest in (C0, sparse(C0), [C0 2*C0], sparse([C0 2*C0]))
-            local x, C, F1
-            C = copy(Ctest)
-            F1 = copy(F)
-            x = (AtA+C*C')\b
-
-            #Test update
-            F11 = CHOLMOD.lowrankupdate(F1, C)
-            @test Array(sparse(F11)) ≈ AtA+C*C'
-            @test F11\b ≈ x
-            #Make sure we get back the same factor again
-            F10 = CHOLMOD.lowrankdowndate(F11, C)
-            @test Array(sparse(F10)) ≈ AtA
-            @test F10\b ≈ x0
-
-            #Test in-place update
-            CHOLMOD.lowrankupdate!(F1, C)
-            @test Array(sparse(F1)) ≈ AtA+C*C'
-            @test F1\b ≈ x
-            #Test in-place downdate
-            CHOLMOD.lowrankdowndate!(F1, C)
-            @test Array(sparse(F1)) ≈ AtA
-            @test F1\b ≈ x0
-
-            @test C == Ctest    #Make sure C didn't change
-        end
-    end
-end
-
-@testset "Issue #22335" begin
-    local A, F
-    A = sparse(1.0I, 3, 3)
-    @test issuccess(cholesky(A))
-    A[3, 3] = -1
-    F = cholesky(A; check = false)
-    @test !issuccess(F)
-    @test issuccess(ldlt!(F, A))
-    A[3, 3] = 1
-    @test A[:, 3:-1:1]\fill(1., 3) == [1, 1, 1]
-end
-
-@testset "Non-positive definite matrices" begin
-    A = sparse(Float64[1 2; 2 1])
-    B = sparse(ComplexF64[1 2; 2 1])
-    for M in (A, B, Symmetric(A), Hermitian(B))
-        F = cholesky(M; check = false)
-        @test_throws PosDefException cholesky(M)
-        @test_throws PosDefException cholesky!(F, M)
-        @test !issuccess(cholesky(M; check = false))
-        @test !issuccess(cholesky!(F, M; check = false))
-    end
-    A = sparse(Float64[0 0; 0 0])
-    B = sparse(ComplexF64[0 0; 0 0])
-    for M in (A, B, Symmetric(A), Hermitian(B))
-        F = ldlt(M; check = false)
-        @test_throws ZeroPivotException ldlt(M)
-        @test_throws ZeroPivotException ldlt!(F, M)
-        @test !issuccess(ldlt(M; check = false))
-        @test !issuccess(ldlt!(F, M; check = false))
-    end
-end
-
-@testset "Issues #27860 & #28363" begin
-    for typeA in (Float64, ComplexF64), typeB in (Float64, ComplexF64), transform in (identity, adjoint, transpose)
-        A = sparse(typeA[2.0 0.1; 0.1 2.0])
-        B = randn(typeB, 2, 2)
-        @test A \ transform(B) ≈ cholesky(A) \ transform(B) ≈ Matrix(A) \ transform(B)
-        C = randn(typeA, 2, 2)
-        sC = sparse(C)
-        sF = typeA <: Real ? cholesky(Symmetric(A)) : cholesky(Hermitian(A))
-        @test cholesky(A) \ transform(sC) ≈ Matrix(A) \ transform(C)
-        @test sF.PtL \ transform(A) ≈ sF.PtL \ Matrix(transform(A))
-    end
-end
-
-@testset "Issue #33365" begin
-    A = Sparse(spzeros(0, 0))
-    @test A * A' == A
-    @test A' * A == A
-    B = Sparse(spzeros(0, 4))
-    @test B * B' == Sparse(spzeros(0, 0))
-    @test B' * B == Sparse(spzeros(4, 4))
-    C = Sparse(spzeros(3, 0))
-    @test C * C' == Sparse(spzeros(3, 3))
-    @test C' * C == Sparse(spzeros(0, 0))
-end
diff --git a/stdlib/SuiteSparse/test/runtests.jl b/stdlib/SuiteSparse/test/runtests.jl
deleted file mode 100644
index cde54e9488818b..00000000000000
--- a/stdlib/SuiteSparse/test/runtests.jl
+++ /dev/null
@@ -1,30 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, Random
-using SuiteSparse, LinearAlgebra, SparseArrays
-
-if Base.USE_GPL_LIBS
-    include("umfpack.jl")
-    include("cholmod.jl")
-    include("spqr.jl")
-
-    # Test multithreaded execution
-    let p, cmd = `$(Base.julia_cmd()) --depwarn=error --startup-file=no threads.jl`
-        # test both nthreads==1 and nthreads>1. spawn a process to test whichever
-        # case we are not running currently.
-        other_nthreads = Threads.nthreads() == 1 ? 4 : 1
-        p = run(
-                pipeline(
-                    setenv(
-                        cmd,
-                        "JULIA_NUM_THREADS" => other_nthreads,
-                        dir=@__DIR__()),
-                    stdout = stdout,
-                    stderr = stderr),
-                wait = false)
-        include("threads.jl")
-        if !success(p)
-            error("SuiteSparse threads test failed with nthreads == $other_nthreads")
-        end
-    end
-end
diff --git a/stdlib/SuiteSparse/test/spqr.jl b/stdlib/SuiteSparse/test/spqr.jl
deleted file mode 100644
index d1802c7ccc3b3b..00000000000000
--- a/stdlib/SuiteSparse/test/spqr.jl
+++ /dev/null
@@ -1,136 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SuiteSparse.SPQR
-using SuiteSparse.CHOLMOD
-using SuiteSparse
-using LinearAlgebra: I, istriu, norm, qr, rank, rmul!, lmul!, Adjoint, Transpose
-using SparseArrays: sparse, sprandn, spzeros, SparseMatrixCSC
-
-@testset "Sparse QR" begin
-m, n = 100, 10
-nn = 100
-
-@test size(qr(sprandn(m, n, 0.1)).Q) == (m, m)
-
-@testset "element type of A: $eltyA" for eltyA in (Float64, ComplexF64)
-    if eltyA <: Real
-        A = sparse([1:n; rand(1:m, nn - n)], [1:n; rand(1:n, nn - n)], randn(nn), m, n)
-    else
-        A = sparse([1:n; rand(1:m, nn - n)], [1:n; rand(1:n, nn - n)], complex.(randn(nn), randn(nn)), m, n)
-    end
-
-    F = qr(A)
-    @test size(F) == (m,n)
-    @test size(F, 1) == m
-    @test size(F, 2) == n
-    @test size(F, 3) == 1
-    @test_throws ArgumentError size(F, 0)
-
-    @testset "getindex" begin
-        @test istriu(F.R)
-        @test isperm(F.pcol)
-        @test isperm(F.prow)
-        @test_throws ErrorException F.T
-    end
-
-    @testset "apply Q" begin
-        Q = F.Q
-        Imm = Matrix{Float64}(I, m, m)
-        @test Q' * (Q*Imm) ≈ Imm
-        @test (Imm*Q) * Q' ≈ Imm
-
-        # test that Q'Pl*A*Pr = R
-        R0 = Q'*Array(A[F.prow, F.pcol])
-        @test R0[1:n, :] ≈ F.R
-        @test norm(R0[n + 1:end, :], 1) < 1e-12
-
-        offsizeA = Matrix{Float64}(I, m+1, m+1)
-        @test_throws DimensionMismatch lmul!(Q, offsizeA)
-        @test_throws DimensionMismatch lmul!(adjoint(Q), offsizeA)
-        @test_throws DimensionMismatch rmul!(offsizeA, Q)
-        @test_throws DimensionMismatch rmul!(offsizeA, adjoint(Q))
-    end
-
-    @testset "element type of B: $eltyB" for eltyB in (Int, Float64, ComplexF64)
-        if eltyB == Int
-            B = rand(1:10, m, 2)
-        elseif eltyB <: Real
-            B = randn(m, 2)
-        else
-            B = complex.(randn(m, 2), randn(m, 2))
-        end
-
-        @inferred A\B
-        @test A\B[:,1] ≈ Array(A)\B[:,1]
-        @test A\B ≈ Array(A)\B
-        @test_throws DimensionMismatch A\B[1:m-1,:]
-        C, x = A[1:9, :], fill(eltyB(1), 9)
-        @test C*(C\x) ≈ x # Underdetermined system
-    end
-
-    # Make sure that conversion to Sparse doesn't use SuiteSparse's symmetric flag
-    @test qr(SparseMatrixCSC{eltyA}(I, 5, 5)) \ fill(eltyA(1), 5) == fill(1, 5)
-end
-
-@testset "basic solution of rank deficient ls" begin
-    A = sprandn(m, 5, 0.9)*sprandn(5, n, 0.9)
-    b = randn(m)
-    xs = A\b
-    xd = Array(A)\b
-
-    # check that basic solution has more zeros
-    @test count(!iszero, xs) < count(!iszero, xd)
-    @test A*xs ≈ A*xd
-end
-
-@testset "Issue 26367" begin
-    A = sparse([0.0 1 0 0; 0 0 0 0])
-    @test Matrix(qr(A).Q) == Matrix(qr(Matrix(A)).Q) == Matrix(I, 2, 2)
-end
-
-@testset "Issue 26368" begin
-    A = sparse([0.0 1 0 0; 0 0 0 0])
-    F = qr(A)
-    @test F.Q*F.R == A[F.prow,F.pcol]
-end
-
-@testset "select ordering overdetermined" begin
-     A = sparse([1:n; rand(1:m, nn - n)], [1:n; rand(1:n, nn - n)], randn(nn), m, n)
-     b = randn(m)
-     xref = Array(A) \ b
-     for ordering ∈ SuiteSparse.SPQR.ORDERINGS
-         QR = qr(A, ordering=ordering)
-         x = QR \ b
-         @test x ≈ xref
-     end
-     @test_throws ErrorException qr(A, ordering=Int32(10))
-end
-
-@testset "select ordering underdetermined" begin
-     A = sparse([1:n; rand(1:n, nn - n)], [1:n; rand(1:m, nn - n)], randn(nn), n, m)
-     b = A * ones(m)
-     for ordering ∈ SuiteSparse.SPQR.ORDERINGS
-         QR = qr(A, ordering=ordering)
-         x = QR \ b
-         # x ≂̸ Array(A) \ b; LAPACK returns a min-norm x while SPQR returns a basic x
-         @test A * x ≈ b
-     end
-     @test_throws ErrorException qr(A, ordering=Int32(10))
-end
-
-@testset "propertynames of QRSparse" begin
-    A = sparse([0.0 1 0 0; 0 0 0 0])
-    F = qr(A)
-    @test propertynames(F) == (:R, :Q, :prow, :pcol)
-    @test propertynames(F, true) == (:R, :Q, :prow, :pcol, :factors, :τ, :cpiv, :rpivinv)
-end
-
-@testset "rank" begin
-    S = sprandn(10, 5, 1.0)*sprandn(5, 10, 1.0)
-    @test rank(qr(S)) == 5
-    @test rank(S) == 5
-    @test all(iszero, (rank(qr(spzeros(10, i))) for i in 1:10))
-    @test all(iszero, (rank(spzeros(10, i)) for i in 1:10))
-end
-
-end
diff --git a/stdlib/SuiteSparse/test/threads.jl b/stdlib/SuiteSparse/test/threads.jl
deleted file mode 100644
index 29e97500dc9885..00000000000000
--- a/stdlib/SuiteSparse/test/threads.jl
+++ /dev/null
@@ -1,22 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, LinearAlgebra, SparseArrays
-
-@testset "threaded SuiteSparse tests" begin
-    A = sprandn(200, 200, 0.2)
-    b = rand(200)
-
-    function test(n::Integer)
-        _A = A[1:n, 1:n]
-        _b = b[1:n]
-        x = qr(_A) \ _b
-        return norm(x)
-    end
-
-    res_threads = zeros(100)
-    Threads.@threads for i in 1:100
-        res_threads[i] = test(i + 100)
-    end
-
-    @test res_threads ≈ [test(i + 100) for i in 1:100]
-end
diff --git a/stdlib/SuiteSparse/test/umfpack.jl b/stdlib/SuiteSparse/test/umfpack.jl
deleted file mode 100644
index a4f749f1ce58b5..00000000000000
--- a/stdlib/SuiteSparse/test/umfpack.jl
+++ /dev/null
@@ -1,240 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SuiteSparse.UMFPACK
-using SuiteSparse
-using SuiteSparse: increment!
-using Serialization
-using LinearAlgebra:
-    I, det, issuccess, ldiv!, lu, lu!, Adjoint, Transpose, SingularException, Diagonal
-using SparseArrays: nnz, sparse, sprand, sprandn, SparseMatrixCSC
-
-@testset "UMFPACK wrappers" begin
-    se33 = sparse(1.0I, 3, 3)
-    do33 = fill(1., 3)
-    @test isequal(se33 \ do33, do33)
-
-    # based on deps/Suitesparse-4.0.2/UMFPACK/Demo/umfpack_di_demo.c
-
-    A0 = sparse(increment!([0,4,1,1,2,2,0,1,2,3,4,4]),
-                increment!([0,4,0,2,1,2,1,4,3,2,1,2]),
-                [2.,1.,3.,4.,-1.,-3.,3.,6.,2.,1.,4.,2.], 5, 5)
-
-    @testset "Core functionality for $Tv elements" for Tv in (Float64, ComplexF64)
-        # We might be able to support two index sizes one day
-        for Ti in Base.uniontypes(SuiteSparse.UMFPACK.UMFITypes)
-            A = convert(SparseMatrixCSC{Tv,Ti}, A0)
-            lua = lu(A)
-            @test nnz(lua) == 18
-            @test_throws ErrorException lua.Z
-            L,U,p,q,Rs = lua.:(:)
-            @test (Diagonal(Rs) * A)[p,q] ≈ L * U
-
-            det(lua) ≈ det(Array(A))
-
-            b = [8., 45., -3., 3., 19.]
-            x = lua\b
-            @test x ≈ float([1:5;])
-
-            @test A*x ≈ b
-            z = complex.(b)
-            x = ldiv!(lua, z)
-            @test x ≈ float([1:5;])
-            @test z === x
-            y = similar(z)
-            ldiv!(y, lua, complex.(b))
-            @test y ≈ x
-
-            @test A*x ≈ b
-
-            b = [8., 20., 13., 6., 17.]
-            x = lua'\b
-            @test x ≈ float([1:5;])
-
-            @test A'*x ≈ b
-            z = complex.(b)
-            x = ldiv!(adjoint(lua), z)
-            @test x ≈ float([1:5;])
-            @test x === z
-            y = similar(x)
-            ldiv!(y, adjoint(lua), complex.(b))
-            @test y ≈ x
-
-            @test A'*x ≈ b
-            x = transpose(lua) \ b
-            @test x ≈ float([1:5;])
-
-            @test transpose(A) * x ≈ b
-            x = ldiv!(transpose(lua), complex.(b))
-            @test x ≈ float([1:5;])
-            y = similar(x)
-            ldiv!(y, transpose(lua), complex.(b))
-            @test y ≈ x
-
-            @test transpose(A) * x ≈ b
-
-            # Element promotion and type inference
-            @inferred lua\fill(1, size(A, 2))
-        end
-    end
-
-    @testset "More tests for complex cases" begin
-        Ac0 = complex.(A0,A0)
-        for Ti in Base.uniontypes(SuiteSparse.UMFPACK.UMFITypes)
-            Ac = convert(SparseMatrixCSC{ComplexF64,Ti}, Ac0)
-            x  = fill(1.0 + im, size(Ac,1))
-            lua = lu(Ac)
-            L,U,p,q,Rs = lua.:(:)
-            @test (Diagonal(Rs) * Ac)[p,q] ≈ L * U
-            b  = Ac*x
-            @test Ac\b ≈ x
-            b  = Ac'*x
-            @test Ac'\b ≈ x
-            b  = transpose(Ac)*x
-            @test transpose(Ac)\b ≈ x
-        end
-    end
-
-    @testset "Rectangular cases. elty=$elty, m=$m, n=$n" for
-        elty in (Float64, ComplexF64),
-            (m, n) in ((10,5), (5, 10))
-
-        Random.seed!(30072018)
-        A = sparse([1:min(m,n); rand(1:m, 10)], [1:min(m,n); rand(1:n, 10)], elty == Float64 ? randn(min(m, n) + 10) : complex.(randn(min(m, n) + 10), randn(min(m, n) + 10)))
-        F = lu(A)
-        L, U, p, q, Rs = F.:(:)
-        @test (Diagonal(Rs) * A)[p,q] ≈ L * U
-    end
-
-    @testset "Issue #4523 - complex sparse \\" begin
-        A, b = sparse((1.0 + im)I, 2, 2), fill(1., 2)
-        @test A * (lu(A)\b) ≈ b
-
-        @test det(sparse([1,3,3,1], [1,1,3,3], [1,1,1,1])) == 0
-    end
-
-    @testset "UMFPACK_ERROR_n_nonpositive" begin
-        @test_throws ArgumentError lu(sparse(Int[], Int[], Float64[], 5, 0))
-    end
-
-    @testset "Issue #15099" for (Tin, Tout) in (
-            (ComplexF16, ComplexF64),
-            (ComplexF32, ComplexF64),
-            (ComplexF64, ComplexF64),
-            (Float16, Float64),
-            (Float32, Float64),
-            (Float64, Float64),
-            (Int, Float64),
-        )
-
-        F = lu(sparse(fill(Tin(1), 1, 1)))
-        L = sparse(fill(Tout(1), 1, 1))
-        @test F.p == F.q == [1]
-        @test F.Rs == [1.0]
-        @test F.L == F.U == L
-        @test F.:(:) == (L, L, [1], [1], [1.0])
-    end
-
-    @testset "BigFloat not supported" for T in (BigFloat, Complex{BigFloat})
-        @test_throws ArgumentError lu(sparse(fill(T(1), 1, 1)))
-    end
-
-    @testset "size(::UmfpackLU)" begin
-        m = n = 1
-        F = lu(sparse(fill(1., m, n)))
-        @test size(F) == (m, n)
-        @test size(F, 1) == m
-        @test size(F, 2) == n
-        @test size(F, 3) == 1
-        @test_throws ArgumentError size(F,-1)
-    end
-
-    @testset "Test aliasing" begin
-        a = rand(5)
-        @test_throws ArgumentError SuiteSparse.UMFPACK.solve!(a, lu(sparse(1.0I, 5, 5)), a, SuiteSparse.UMFPACK.UMFPACK_A)
-        aa = complex(a)
-        @test_throws ArgumentError SuiteSparse.UMFPACK.solve!(aa, lu(sparse((1.0im)I, 5, 5)), aa, SuiteSparse.UMFPACK.UMFPACK_A)
-    end
-
-    @testset "Issues #18246,18244 - lu sparse pivot" begin
-        A = sparse(1.0I, 4, 4)
-        A[1:2,1:2] = [-.01 -200; 200 .001]
-        F = lu(A)
-        @test F.p == [3 ; 4 ; 2 ; 1]
-    end
-
-    @testset "Test that A[c|t]_ldiv_B!{T<:Complex}(X::StridedMatrix{T}, lu::UmfpackLU{Float64}, B::StridedMatrix{T}) works as expected." begin
-        N = 10
-        p = 0.5
-        A = N*I + sprand(N, N, p)
-        X = zeros(ComplexF64, N, N)
-        B = complex.(rand(N, N), rand(N, N))
-        luA, lufA = lu(A), lu(Array(A))
-        @test ldiv!(copy(X), luA, B) ≈ ldiv!(copy(X), lufA, B)
-        @test ldiv!(copy(X), adjoint(luA), B) ≈ ldiv!(copy(X), adjoint(lufA), B)
-        @test ldiv!(copy(X), transpose(luA), B) ≈ ldiv!(copy(X), transpose(lufA), B)
-    end
-
-    @testset "singular matrix" begin
-        for A in sparse.((Float64[1 2; 0 0], ComplexF64[1 2; 0 0]))
-            @test_throws SingularException lu(A)
-            @test !issuccess(lu(A; check = false))
-        end
-    end
-
-    @testset "deserialization" begin
-        A  = 10*I + sprandn(10, 10, 0.4)
-        F1 = lu(A)
-        b  = IOBuffer()
-        serialize(b, F1)
-        seekstart(b)
-        F2 = deserialize(b)
-        for nm in (:colptr, :m, :n, :nzval, :rowval, :status)
-            @test getfield(F1, nm) == getfield(F2, nm)
-        end
-    end
-
-    @testset "Reuse symbolic LU factorization" begin
-        A1 = sparse(increment!([0,4,1,1,2,2,0,1,2,3,4,4]),
-                    increment!([0,4,0,2,1,2,1,4,3,2,1,2]),
-                    [2.,1.,3.,4.,-1.,-3.,3.,9.,2.,1.,4.,2.], 5, 5)
-        for Tv in (Float64, ComplexF64, Float16, Float32, ComplexF16, ComplexF32)
-            for Ti in Base.uniontypes(SuiteSparse.UMFPACK.UMFITypes)
-                A = convert(SparseMatrixCSC{Tv,Ti}, A0)
-                B = convert(SparseMatrixCSC{Tv,Ti}, A1)
-                b = Tv[8., 45., -3., 3., 19.]
-                F = lu(A)
-                lu!(F, B)
-                @test F\b ≈ B\b ≈ Matrix(B)\b
-
-                # singular matrix
-                C = copy(B)
-                C[4, 3] = Tv(0)
-                F = lu(A)
-                @test_throws SingularException lu!(F, C)
-
-                # change of nonzero pattern
-                D = copy(B)
-                D[5, 1] = Tv(1.0)
-                F = lu(A)
-                @test_throws ArgumentError lu!(F, D)
-            end
-        end
-    end
-
-end
-
-@testset "REPL printing of UmfpackLU" begin
-    # regular matrix
-    A = sparse([1, 2], [1, 2], Float64[1.0, 1.0])
-    F = lu(A)
-    facstring = sprint((t, s) -> show(t, "text/plain", s), F)
-    lstring = sprint((t, s) -> show(t, "text/plain", s), F.L)
-    ustring = sprint((t, s) -> show(t, "text/plain", s), F.U)
-    @test facstring == "$(summary(F))\nL factor:\n$lstring\nU factor:\n$ustring"
-
-    # singular matrix
-    B = sparse(zeros(Float64, 2, 2))
-    F = lu(B; check=false)
-    facstring = sprint((t, s) -> show(t, "text/plain", s), F)
-    @test facstring == "Failed factorization of type $(summary(F))"
-end
diff --git a/stdlib/SuiteSparse_jll/Project.toml b/stdlib/SuiteSparse_jll/Project.toml
index 6b704c4bf1112b..f36ce756c834c5 100644
--- a/stdlib/SuiteSparse_jll/Project.toml
+++ b/stdlib/SuiteSparse_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "SuiteSparse_jll"
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "5.8.1+0"
+version = "5.10.1+0"
 
 [deps]
 libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
@@ -10,3 +10,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.7"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
index 19cf398feec252..2940970ceff9fe 100644
--- a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
+++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
@@ -8,7 +8,7 @@ Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 const PATH_list = String[]
 const LIBPATH_list = String[]
 
-export libamd, libbtf, libcamd, libccolamd, libcholmod, libcolamd, libklu, libldl, librbio, libspqr, libsuitesparse_wrapper, libsuitesparseconfig, libumfpack
+export libamd, libbtf, libcamd, libccolamd, libcholmod, libcolamd, libklu, libldl, librbio, libspqr, libsuitesparseconfig, libumfpack
 
 # These get calculated in __init__()
 # Man I can't wait until these are automatically handled by an in-Base JLLWrappers clone.
@@ -35,8 +35,6 @@ librbio_handle = C_NULL
 librbio_path = ""
 libspqr_handle = C_NULL
 libspqr_path = ""
-libsuitesparse_wrapper_handle = C_NULL
-libsuitesparse_wrapper_path = ""
 libsuitesparseconfig_handle = C_NULL
 libsuitesparseconfig_path = ""
 libumfpack_handle = C_NULL
@@ -53,7 +51,6 @@ if Sys.iswindows()
     const libldl = "libldl.dll"
     const librbio = "librbio.dll"
     const libspqr = "libspqr.dll"
-    const libsuitesparse_wrapper = "libsuitesparse_wrapper.dll"
     const libsuitesparseconfig = "libsuitesparseconfig.dll"
     const libumfpack = "libumfpack.dll"
 elseif Sys.isapple()
@@ -67,7 +64,6 @@ elseif Sys.isapple()
     const libldl = "@rpath/libldl.2.dylib"
     const librbio = "@rpath/librbio.2.dylib"
     const libspqr = "@rpath/libspqr.2.dylib"
-    const libsuitesparse_wrapper = "@rpath/libsuitesparse_wrapper.dylib"
     const libsuitesparseconfig = "@rpath/libsuitesparseconfig.5.dylib"
     const libumfpack = "@rpath/libumfpack.5.dylib"
 else
@@ -81,7 +77,6 @@ else
     const libldl = "libldl.so.2"
     const librbio = "librbio.so.2"
     const libspqr = "libspqr.so.2"
-    const libsuitesparse_wrapper = "libsuitesparse_wrapper.so"
     const libsuitesparseconfig = "libsuitesparseconfig.so.5"
     const libumfpack = "libumfpack.so.5"
 end
@@ -107,15 +102,11 @@ function __init__()
     global librbio_path = dlpath(librbio_handle)
     global libspqr_handle = dlopen(libspqr)
     global libspqr_path = dlpath(libspqr_handle)
-    global libsuitesparse_wrapper_handle = dlopen(libsuitesparse_wrapper)
-    global libsuitesparse_wrapper_path = dlpath(libsuitesparse_wrapper_handle)
     global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
     global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
     global libumfpack_handle = dlopen(libumfpack)
     global libumfpack_path = dlpath(libumfpack_handle)
     global artifact_dir = dirname(Sys.BINDIR)
-    LIBPATH[] = dirname(libsuitesparse_wrapper_path)
-    push!(LIBPATH_list, LIBPATH[])
 end
 
 # JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
@@ -135,7 +126,6 @@ get_libklu_path() = libklu_path
 get_libldl_path() = libldl_path
 get_librbio_path() = librbio_path
 get_libspqr_path() = libspqr_path
-get_libsuitesparse_wrapper_path() = libsuitesparse_wrapper_path
 get_libsuitesparseconfig_path() = libsuitesparseconfig_path
 get_libumfpack_path() = libumfpack_path
 
diff --git a/stdlib/SuiteSparse_jll/test/runtests.jl b/stdlib/SuiteSparse_jll/test/runtests.jl
index 60e5cf74d6534f..ca356951f99e22 100644
--- a/stdlib/SuiteSparse_jll/test/runtests.jl
+++ b/stdlib/SuiteSparse_jll/test/runtests.jl
@@ -3,5 +3,5 @@
 using Test, SuiteSparse_jll
 
 @testset "SuiteSparse_jll" begin
-    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) == 5008
+    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) == 5010
 end
diff --git a/stdlib/TOML/src/TOML.jl b/stdlib/TOML/src/TOML.jl
index a8ad706d0b8b46..4765a05c05f527 100644
--- a/stdlib/TOML/src/TOML.jl
+++ b/stdlib/TOML/src/TOML.jl
@@ -38,7 +38,7 @@ const Parser = Internals.Parser
 Parse file `f` and return the resulting table (dictionary). Throw a
 [`ParserError`](@ref) upon failure.
 
-See also: [`TOML.tryparsefile`](@ref)
+See also [`TOML.tryparsefile`](@ref).
 """
 parsefile(f::AbstractString) =
     Internals.parse(Parser(readstring(f); filepath=abspath(f)))
@@ -52,7 +52,7 @@ parsefile(p::Parser, f::AbstractString) =
 Parse file `f` and return the resulting table (dictionary). Return a
 [`ParserError`](@ref) upon failure.
 
-See also: [`TOML.parsefile`](@ref)
+See also [`TOML.parsefile`](@ref).
 """
 tryparsefile(f::AbstractString) =
     Internals.tryparse(Parser(readstring(f); filepath=abspath(f)))
@@ -66,7 +66,7 @@ tryparsefile(p::Parser, f::AbstractString) =
 Parse the string  or stream `x`, and return the resulting table (dictionary).
 Throw a [`ParserError`](@ref) upon failure.
 
-See also: [`TOML.tryparse`](@ref)
+See also [`TOML.tryparse`](@ref).
 """
 parse(str::AbstractString) =
     Internals.parse(Parser(String(str)))
@@ -82,7 +82,7 @@ parse(p::Parser, io::IO) = parse(p, read(io, String))
 Parse the string or stream `x`, and return the resulting table (dictionary).
 Return a [`ParserError`](@ref) upon failure.
 
-See also: [`TOML.parse`](@ref)
+See also [`TOML.parse`](@ref).
 """
 tryparse(str::AbstractString) =
     Internals.tryparse(Parser(String(str)))
diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl
index 71896b7a1d7176..059414152f7271 100644
--- a/stdlib/TOML/src/print.jl
+++ b/stdlib/TOML/src/print.jl
@@ -2,8 +2,37 @@
 
 import Dates
 
+import Base: @invokelatest
 import ..isvalid_barekey_char
 
+function print_toml_escaped(io::IO, s::AbstractString)
+    for c::AbstractChar in s
+        if !isvalid(c)
+            error("TOML print: invalid character $(repr(c)) encountered when printing string")
+        end
+        if c == '\b'
+            Base.print(io, '\\', 'b')
+        elseif c == '\t'
+            Base.print(io, '\\', 't')
+        elseif c == '\n'
+            Base.print(io, '\\', 'n')
+        elseif c == '\f'
+            Base.print(io, '\\', 'f')
+        elseif c == '\r'
+            Base.print(io, '\\', 'r')
+        elseif c == '"'
+            Base.print(io, '\\', '"')
+        elseif c == '\\'
+            Base.print(io, "\\", '\\')
+        elseif Base.iscntrl(c)
+            Base.print(io, "\\u")
+            Base.print(io, string(UInt32(c), base=16, pad=4))
+        else
+            Base.print(io, c)
+        end
+    end
+end
+
 function printkey(io::IO, keys::Vector{String})
     for (i, k) in enumerate(keys)
         i != 1 && Base.print(io, ".")
@@ -12,7 +41,9 @@ function printkey(io::IO, keys::Vector{String})
             Base.print(io, "\"\"")
         elseif any(!isvalid_barekey_char, k)
             # quoted key
-            Base.print(io, "\"", escape_string(k) ,"\"")
+            Base.print(io, "\"")
+            print_toml_escaped(io, k)
+            Base.print(io, "\"")
         else
             Base.print(io, k)
         end
@@ -20,46 +51,40 @@ function printkey(io::IO, keys::Vector{String})
 end
 
 const MbyFunc = Union{Function, Nothing}
-const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, String}
-function printvalue(f::MbyFunc, io::IO, value::AbstractVector; sorted=false)
+const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, AbstractString}
+function printvalue(f::MbyFunc, io::IO, value::AbstractVector; sorted=false, by=identity)
     Base.print(io, "[")
     for (i, x) in enumerate(value)
         i != 1 && Base.print(io, ", ")
         if isa(x, AbstractDict)
-            _print(f, io, x; sorted)
+            _print(f, io, x; sorted, by)
         else
-            printvalue(f, io, x; sorted)
+            printvalue(f, io, x; sorted, by)
         end
     end
     Base.print(io, "]")
 end
-function printvalue(f::MbyFunc, io::IO, value; sorted)
-    if f === nothing
-        error("type `$(typeof(value))` is not a valid TOML type, pass a conversion function to `TOML.print`")
-    end
-    toml_value = f(value)
-    if !(toml_value isa TOMLValue)
-        error("TOML syntax function for type `$(typeof(value))` did not return a valid TOML type but a `$(typeof(toml_value))`")
-    end
-    Base.invokelatest(printvalue, f, io, toml_value; sorted)
-end
-printvalue(f::MbyFunc, io::IO, value::AbstractDict; sorted) =
-    _print(f, io, value; sorted)
-printvalue(f::MbyFunc, io::IO, value::Dates.DateTime; sorted) =
+printvalue(f::MbyFunc, io::IO, value::AbstractDict; sorted=false, by=identity) =
+    _print(f, io, value; sorted, by)
+printvalue(f::MbyFunc, io::IO, value::Dates.DateTime; _...) =
     Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd\THH:MM:SS.sss\Z"))
-printvalue(f::MbyFunc, io::IO, value::Dates.Time; sorted) =
+printvalue(f::MbyFunc, io::IO, value::Dates.Time; _...) =
     Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss"))
-printvalue(f::MbyFunc, io::IO, value::Dates.Date; sorted) =
+printvalue(f::MbyFunc, io::IO, value::Dates.Date; _...) =
     Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd"))
-printvalue(f::MbyFunc, io::IO, value::Bool; sorted) =
+printvalue(f::MbyFunc, io::IO, value::Bool; _...) =
     Base.print(io, value ? "true" : "false")
-printvalue(f::MbyFunc, io::IO, value::Integer; sorted) =
+printvalue(f::MbyFunc, io::IO, value::Integer; _...) =
     Base.print(io, Int64(value))  # TOML specifies 64-bit signed long range for integer
-printvalue(f::MbyFunc, io::IO, value::AbstractFloat; sorted) =
+printvalue(f::MbyFunc, io::IO, value::AbstractFloat; _...) =
     Base.print(io, isnan(value) ? "nan" :
                    isinf(value) ? string(value > 0 ? "+" : "-", "inf") :
                    Float64(value))  # TOML specifies IEEE 754 binary64 for float
-printvalue(f::MbyFunc, io::IO, value::AbstractString; sorted) = Base.print(io, "\"", escape_string(value), "\"")
+function printvalue(f::MbyFunc, io::IO, value::AbstractString; _...)
+    Base.print(io, "\"")
+    print_toml_escaped(io, value)
+    Base.print(io, "\"")
+end
 
 is_table(value)           = isa(value, AbstractDict)
 is_array_of_tables(value) = isa(value, AbstractArray) &&
@@ -70,8 +95,8 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
     ks::Vector{String} = String[];
     indent::Int = 0,
     first_block::Bool = true,
-    sorted::Bool,
-    by::Function,
+    sorted::Bool = false,
+    by::Function = identity,
 )
     akeys = keys(a)
     if sorted
@@ -82,11 +107,25 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
     for key in akeys
         value = a[key]
         is_tabular(value) && continue
-        Base.print(io, ' '^4max(0,indent-1))
-        printkey(io, [String(key)])
-        Base.print(io, " = ") # print separator
-        printvalue(f, io, value; sorted)
-        Base.print(io, "\n")  # new line?
+        if !isa(value, TOMLValue)
+            if f === nothing
+                error("type `$(typeof(value))` is not a valid TOML type, pass a conversion function to `TOML.print`")
+            end
+            toml_value = f(value)
+            if !(toml_value isa TOMLValue)
+                error("TOML syntax function for type `$(typeof(value))` did not return a valid TOML type but a `$(typeof(toml_value))`")
+            end
+            value = toml_value
+        end
+        if is_tabular(value)
+            _print(f, io, Dict(key => value); indent, first_block, sorted, by)
+        else
+            Base.print(io, ' '^4max(0,indent-1))
+            printkey(io, [String(key)])
+            Base.print(io, " = ") # print separator
+            printvalue(f, io, value; sorted, by)
+            Base.print(io, "\n")  # new line?
+        end
         first_block = false
     end
 
@@ -105,7 +144,7 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]\n")
             end
             # Use runtime dispatch here since the type of value seems not to be enforced other than as AbstractDict
-            Base.invokelatest(_print, f, io, value, ks; indent = indent + header, first_block = header, sorted, by)
+            @invokelatest _print(f, io, value, ks; indent = indent + header, first_block = header, sorted, by)
             pop!(ks)
         elseif is_array_of_tables(value)
             # print array of tables
@@ -119,7 +158,7 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]]\n")
                 # TODO, nicer error here
                 !isa(v, AbstractDict) && error("array should contain only tables")
-                Base.invokelatest(_print, f, io, v, ks; indent = indent + 1, sorted, by)
+                @invokelatest _print(f, io, v, ks; indent = indent + 1, sorted, by)
             end
             pop!(ks)
         end
diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl
index d6bcdf10d4f17e..4ab5e2d8d066d4 100644
--- a/stdlib/TOML/test/print.jl
+++ b/stdlib/TOML/test/print.jl
@@ -21,12 +21,34 @@ struct MyStruct
     a::Int
 end
 @test_throws ErrorException toml_str(Dict("foo" => MyStruct(1)))
+# simple value
 @test toml_str(Dict("foo" => MyStruct(1))) do x
         x isa MyStruct && return x.a
     end == """
         foo = 1
         """
 
+# tabular values
+@test toml_str(Dict("foo" => MyStruct(1)); sorted=true) do x
+         x isa MyStruct && return [x.a]
+     end == """
+         foo = [1]
+         """
+@test toml_str(Dict("foo" => MyStruct(1)); sorted=true) do x
+        x isa MyStruct && return Dict(:bar => x.a)
+    end == """
+        [foo]
+        bar = 1
+        """
+
+# validation against the usual case
+@test toml_str(Dict("foo" => MyStruct(1)); sorted=true) do x
+         x isa MyStruct && return [x.a]
+     end == toml_str(Dict("foo" => [1]); sorted=true)
+@test toml_str(Dict("foo" => MyStruct(1)); sorted=true) do x
+        x isa MyStruct && return Dict(:bar => x.a)
+    end == toml_str(Dict("foo" => Dict(:bar => 1)); sorted=true)
+
 @test toml_str(Dict("b" => SubString("foo"))) == "b = \"foo\"\n"
 
 @testset "empty dict print" begin
@@ -38,3 +60,14 @@ end
     d = TOML.parse(s)
     @test toml_str(d) == "user = \"me\"\n\n[julia]\n\n[option]\n"
 end
+
+@testset "special characters" begin
+    s = """
+    "\U1f355 \0 \x0 \x1 \t \b" = "\U1f355 \0 \x0 \x1 \t \b"
+    "\x7f" = "\x7f"
+    """
+    @test roundtrip(s)
+
+    d = Dict("str" => string(Char(0xd800)))
+    @test_throws ErrorException TOML.print(devnull, d)
+end
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
index 0ab47a82fd468d..b129d22665e128 100644
--- a/stdlib/Tar.version
+++ b/stdlib/Tar.version
@@ -1,2 +1,2 @@
 TAR_BRANCH = master
-TAR_SHA1 = ac4d442266a676ce2d1a43acb55fc07d1edc6566
+TAR_SHA1 = ffb3dd5e697eb6690fce9cceb67edb82134f8337
diff --git a/stdlib/Test/docs/src/index.md b/stdlib/Test/docs/src/index.md
index 8e3ba439ab155a..98fdf45706bf7f 100644
--- a/stdlib/Test/docs/src/index.md
+++ b/stdlib/Test/docs/src/index.md
@@ -42,9 +42,13 @@ If the condition is true, a `Pass` is returned:
 ```jldoctest testfoo
 julia> @test foo("bar") == 9
 Test Passed
+  Expression: foo("bar") == 9
+   Evaluated: 9 == 9
 
 julia> @test foo("fizz") >= 10
 Test Passed
+  Expression: foo("fizz") >= 10
+   Evaluated: 16 >= 10
 ```
 
 If the condition is false, then a `Fail` is returned and an exception is thrown:
@@ -83,6 +87,7 @@ to check that this occurs:
 ```jldoctest testfoo
 julia> @test_throws MethodError foo(:cat)
 Test Passed
+  Expression: foo(:cat)
       Thrown: MethodError
 ```
 
@@ -102,6 +107,7 @@ or could not be evaluated due to an error, the test set will then throw a `TestS
 
 ```@docs
 Test.@testset
+Test.TestSetException
 ```
 
 We can put our tests for the `foo(x)` function in a test set:
@@ -193,6 +199,8 @@ checks using either `@test a ≈ b` (where `≈`, typed via tab completion of `\
 ```jldoctest
 julia> @test 1 ≈ 0.999999999
 Test Passed
+  Expression: 1 ≈ 0.999999999
+   Evaluated: 1 ≈ 0.999999999
 
 julia> @test 1 ≈ 0.999999
 Test Failed at none:1
@@ -200,6 +208,15 @@ Test Failed at none:1
    Evaluated: 1 ≈ 0.999999
 ERROR: There was an error during testing
 ```
+You can specify relative and absolute tolerances by setting the `rtol` and `atol` keyword arguments of `isapprox`, respectively,
+after the `≈` comparison:
+```jldoctest
+julia> @test 1 ≈ 0.999999  rtol=1e-5
+Test Passed
+  Expression: ≈(1, 0.999999, rtol = 1.0e-5)
+   Evaluated: ≈(1, 0.999999; rtol = 1.0e-5)
+```
+Note that this is not a specific feature of the `≈` but rather a general feature of the `@test` macro: `@test a <op> b key=val` is transformed by the macro into `@test op(a, b, key=val)`. It is, however, particularly useful for `≈` tests.
 
 ```@docs
 Test.@inferred
@@ -288,6 +305,18 @@ And using that testset looks like:
 end
 ```
 
+## Test utilities
+
+```@docs
+Test.GenericArray
+Test.GenericDict
+Test.GenericOrder
+Test.GenericSet
+Test.GenericString
+Test.detect_ambiguities
+Test.detect_unbound_args
+```
+
 ```@meta
 DocTestSetup = nothing
 ```
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 0a260b68bc2dd9..f2175db01a53a2 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -85,8 +85,10 @@ struct Pass <: Result
     orig_expr
     data
     value
-    function Pass(test_type::Symbol, orig_expr, data, thrown)
-        return new(test_type, orig_expr, data, thrown isa String ? "String" : thrown)
+    source::Union{Nothing,LineNumberNode}
+    message_only::Bool
+    function Pass(test_type::Symbol, orig_expr, data, thrown, source=nothing, message_only=false)
+        return new(test_type, orig_expr, data, thrown, source, message_only)
     end
 end
 
@@ -97,7 +99,11 @@ function Base.show(io::IO, t::Pass)
     end
     if t.test_type === :test_throws
         # The correct type of exception was thrown
-        print(io, "\n      Thrown: ", t.value isa String ? t.value : typeof(t.value))
+        if t.message_only
+            print(io, "\n     Message: ", t.value)
+        else
+            print(io, "\n      Thrown: ", typeof(t.value))
+        end
     elseif t.test_type === :test && t.data !== nothing
         # The test was an expression, so display the term-by-term
         # evaluated version as well
@@ -117,12 +123,14 @@ struct Fail <: Result
     data::Union{Nothing, String}
     value::String
     source::LineNumberNode
-    function Fail(test_type::Symbol, orig_expr, data, value, source::LineNumberNode)
+    message_only::Bool
+    function Fail(test_type::Symbol, orig_expr, data, value, source::LineNumberNode, message_only::Bool=false)
         return new(test_type,
             string(orig_expr),
             data === nothing ? nothing : string(data),
             string(isa(data, Type) ? typeof(value) : value),
-            source)
+            source,
+            message_only)
     end
 end
 
@@ -131,18 +139,24 @@ function Base.show(io::IO, t::Fail)
     print(io, " at ")
     printstyled(io, something(t.source.file, :none), ":", t.source.line, "\n"; bold=true, color=:default)
     print(io, "  Expression: ", t.orig_expr)
+    value, data = t.value, t.data
     if t.test_type === :test_throws_wrong
         # An exception was thrown, but it was of the wrong type
-        print(io, "\n    Expected: ", t.data)
-        print(io, "\n      Thrown: ", t.value)
+        if t.message_only
+            print(io, "\n    Expected: ", data)
+            print(io, "\n     Message: ", value)
+        else
+            print(io, "\n    Expected: ", data)
+            print(io, "\n      Thrown: ", value)
+        end
     elseif t.test_type === :test_throws_nothing
         # An exception was expected, but no exception was thrown
-        print(io, "\n    Expected: ", t.data)
+        print(io, "\n    Expected: ", data)
         print(io, "\n  No exception thrown")
-    elseif t.test_type === :test && t.data !== nothing
+    elseif t.test_type === :test && data !== nothing
         # The test was an expression, so display the term-by-term
         # evaluated version as well
-        print(io, "\n   Evaluated: ", t.data)
+        print(io, "\n   Evaluated: ", data)
     end
 end
 
@@ -236,6 +250,8 @@ function Serialization.serialize(s::Serialization.AbstractSerializer, t::Pass)
     Serialization.serialize(s, t.orig_expr === nothing ? nothing : string(t.orig_expr))
     Serialization.serialize(s, t.data === nothing ? nothing : string(t.data))
     Serialization.serialize(s, string(t.value))
+    Serialization.serialize(s, t.source === nothing ? nothing : t.source)
+    Serialization.serialize(s, t.message_only)
     nothing
 end
 
@@ -342,18 +358,24 @@ end
 """
     @test ex
     @test f(args...) key=val ...
+    @test ex broken=true
+    @test ex skip=true
 
-Tests that the expression `ex` evaluates to `true`.
-Returns a `Pass` `Result` if it does, a `Fail` `Result` if it is
+Test that the expression `ex` evaluates to `true`.
+If executed inside a `@testset`, return a `Pass` `Result` if it does, a `Fail` `Result` if it is
 `false`, and an `Error` `Result` if it could not be evaluated.
+If executed outside a `@testset`, throw an exception instead of returning `Fail` or `Error`.
 
 # Examples
 ```jldoctest
 julia> @test true
 Test Passed
+  Expression: true
 
 julia> @test [1, 2] + [2, 1] == [3, 3]
 Test Passed
+  Expression: [1, 2] + [2, 1] == [3, 3]
+   Evaluated: [3, 3] == [3, 3]
 ```
 
 The `@test f(args...) key=val...` form is equivalent to writing
@@ -363,17 +385,82 @@ is a call using infix syntax such as approximate comparisons:
 ```jldoctest
 julia> @test π ≈ 3.14 atol=0.01
 Test Passed
+  Expression: ≈(π, 3.14, atol = 0.01)
+   Evaluated: ≈(π, 3.14; atol = 0.01)
 ```
 
 This is equivalent to the uglier test `@test ≈(π, 3.14, atol=0.01)`.
 It is an error to supply more than one expression unless the first
 is a call expression and the rest are assignments (`k=v`).
+
+You can use any key for the `key=val` arguments, except for `broken` and `skip`,
+which have special meanings in the context of `@test`:
+
+* `broken=cond` indicates a test that should pass but currently consistently
+  fails when `cond==true`.  Tests that the expression `ex` evaluates to `false`
+  or causes an exception.  Returns a `Broken` `Result` if it does, or an `Error`
+  `Result` if the expression evaluates to `true`.  Regular `@test ex` is
+  evaluated when `cond==false`.
+* `skip=cond` marks a test that should not be executed but should be included in
+  test summary reporting as `Broken`, when `cond==true`.  This can be useful for
+  tests that intermittently fail, or tests of not-yet-implemented functionality.
+  Regular `@test ex` is evaluated when `cond==false`.
+
+# Examples
+
+```jldoctest
+julia> @test 2 + 2 ≈ 6 atol=1 broken=true
+Test Broken
+  Expression: ≈(2 + 2, 6, atol = 1)
+
+julia> @test 2 + 2 ≈ 5 atol=1 broken=false
+Test Passed
+  Expression: ≈(2 + 2, 5, atol = 1)
+   Evaluated: ≈(4, 5; atol = 1)
+
+julia> @test 2 + 2 == 5 skip=true
+Test Broken
+  Skipped: 2 + 2 == 5
+
+julia> @test 2 + 2 == 4 skip=false
+Test Passed
+  Expression: 2 + 2 == 4
+   Evaluated: 4 == 4
+```
+
+!!! compat "Julia 1.7"
+     The `broken` and `skip` keyword arguments require at least Julia 1.7.
 """
 macro test(ex, kws...)
+    # Collect the broken/skip keywords and remove them from the rest of keywords
+    broken = [kw.args[2] for kw in kws if kw.args[1] === :broken]
+    skip = [kw.args[2] for kw in kws if kw.args[1] === :skip]
+    kws = filter(kw -> kw.args[1] ∉ (:skip, :broken), kws)
+    # Validation of broken/skip keywords
+    for (kw, name) in ((broken, :broken), (skip, :skip))
+        if length(kw) > 1
+            error("invalid test macro call: cannot set $(name) keyword multiple times")
+        end
+    end
+    if length(skip) > 0 && length(broken) > 0
+        error("invalid test macro call: cannot set both skip and broken keywords")
+    end
+
+    # Build the test expression
     test_expr!("@test", ex, kws...)
     orig_ex = Expr(:inert, ex)
+
     result = get_test_result(ex, __source__)
-    :(do_test($result, $orig_ex))
+
+    return quote
+        if $(length(skip) > 0 && esc(skip[1]))
+            record(get_testset(), Broken(:skipped, $orig_ex))
+        else
+            let _do = $(length(broken) > 0 && esc(broken[1])) ? do_broken_test : do_test
+                _do($result, $orig_ex)
+            end
+        end
+    end
 end
 
 """
@@ -383,7 +470,8 @@ end
 Indicates a test that should pass but currently consistently fails.
 Tests that the expression `ex` evaluates to `false` or causes an
 exception. Returns a `Broken` `Result` if it does, or an `Error` `Result`
-if the expression evaluates to `true`.
+if the expression evaluates to `true`.  This is equivalent to
+[`@test ex broken=true`](@ref @test).
 
 The `@test_broken f(args...) key=val...` form works as for the `@test` macro.
 
@@ -412,7 +500,8 @@ end
 
 Marks a test that should not be executed but should be included in test
 summary reporting as `Broken`. This can be useful for tests that intermittently
-fail, or tests of not-yet-implemented functionality.
+fail, or tests of not-yet-implemented functionality.  This is equivalent to
+[`@test ex skip=true`](@ref @test).
 
 The `@test_skip f(args...) key=val...` form works as for the `@test` macro.
 
@@ -453,6 +542,12 @@ function get_test_result(ex, source)
         first(string(ex.args[1])) != '.' && !is_splat(ex.args[2]) && !is_splat(ex.args[3]) &&
         (ex.args[1] === :(==) || Base.operator_precedence(ex.args[1]) == comparison_prec)
         ex = Expr(:comparison, ex.args[2], ex.args[1], ex.args[3])
+
+    # Mark <: and >: as :comparison expressions
+    elseif isa(ex, Expr) && length(ex.args) == 2 &&
+        !is_splat(ex.args[1]) && !is_splat(ex.args[2]) &&
+        Base.operator_precedence(ex.head) == comparison_prec
+        ex = Expr(:comparison, ex.args[1], ex.head, ex.args[2])
     end
     if isa(ex, Expr) && ex.head === :comparison
         # pass all terms of the comparison to `eval_comparison`, as an Expr
@@ -520,7 +615,7 @@ function get_test_result(ex, source)
             $testret
         catch _e
             _e isa InterruptException && rethrow()
-            Threw(_e, Base.catch_stack(), $(QuoteNode(source)))
+            Threw(_e, Base.current_exceptions(), $(QuoteNode(source)))
         end
     end
     Base.remove_linenums!(result)
@@ -541,7 +636,7 @@ function do_test(result::ExecutionResult, orig_expr)
         value = result.value
         testres = if isa(value, Bool)
             # a true value Passes
-            value ? Pass(:test, nothing, nothing, value) :
+            value ? Pass(:test, orig_expr, result.data, value, result.source) :
                     Fail(:test, orig_expr, result.data, value, result.source)
         else
             # If the result is non-Boolean, this counts as an Error
@@ -576,6 +671,8 @@ end
 
 Tests that the expression `expr` throws `exception`.
 The exception may specify either a type,
+a string, regular expression, or list of strings occurring in the displayed error message,
+a matching function,
 or a value (which will be tested for equality by comparing fields).
 Note that `@test_throws` does not support a trailing keyword form.
 
@@ -583,12 +680,25 @@ Note that `@test_throws` does not support a trailing keyword form.
 ```jldoctest
 julia> @test_throws BoundsError [1, 2, 3][4]
 Test Passed
+  Expression: ([1, 2, 3])[4]
       Thrown: BoundsError
 
 julia> @test_throws DimensionMismatch [1, 2, 3] + [1, 2]
 Test Passed
+  Expression: [1, 2, 3] + [1, 2]
       Thrown: DimensionMismatch
+
+julia> @test_throws "Try sqrt(Complex" sqrt(-1)
+Test Passed
+  Expression: sqrt(-1)
+     Message: "DomainError with -1.0:\\nsqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x))."
 ```
+
+In the final example, instead of matching a single string it could alternatively have been performed with:
+
+- `["Try", "Complex"]` (a list of strings)
+- `r"Try sqrt\\([Cc]omplex"` (a regular expression)
+- `str -> occursin("complex", str)` (a matching function)
 """
 macro test_throws(extype, ex)
     orig_ex = Expr(:inert, ex)
@@ -606,16 +716,34 @@ macro test_throws(extype, ex)
     :(do_test_throws($result, $orig_ex, $(esc(extype))))
 end
 
+const MACROEXPAND_LIKE = Symbol.(("@macroexpand", "@macroexpand1", "macroexpand"))
+
 # An internal function, called by the code generated by @test_throws
 # to evaluate and catch the thrown exception - if it exists
 function do_test_throws(result::ExecutionResult, orig_expr, extype)
     if isa(result, Threw)
         # Check that the right type of exception was thrown
         success = false
+        message_only = false
         exc = result.exception
+        # NB: Throwing LoadError from macroexpands is deprecated, but in order to limit
+        # the breakage in package tests we add extra logic here.
+        from_macroexpand =
+            orig_expr isa Expr &&
+            orig_expr.head in (:call, :macrocall) &&
+            orig_expr.args[1] in MACROEXPAND_LIKE
         if isa(extype, Type)
-            success = isa(exc, extype)
-        else
+            success =
+                if from_macroexpand && extype == LoadError && exc isa Exception
+                    Base.depwarn("macroexpand no longer throws a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
+                    true
+                else
+                    isa(exc, extype)
+                end
+        elseif isa(extype, Exception) || !isa(exc, Exception)
+            if extype isa LoadError && !(exc isa LoadError) && typeof(extype.error) == typeof(exc)
+                extype = extype.error # deprecated
+            end
             if isa(exc, typeof(extype))
                 success = true
                 for fld in 1:nfields(extype)
@@ -625,11 +753,21 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
                     end
                 end
             end
+        else
+            message_only = true
+            exc = sprint(showerror, exc)
+            success = contains_warn(exc, extype)
+            exc = repr(exc)
+            if isa(extype, AbstractString)
+                extype = repr(extype)
+            elseif isa(extype, Function)
+                extype = "< match function >"
+            end
         end
         if success
-            testres = Pass(:test_throws, nothing, nothing, exc)
+            testres = Pass(:test_throws, orig_expr, extype, exc, result.source, message_only)
         else
-            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, result.source)
+            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, result.source, message_only)
         end
     else
         testres = Fail(:test_throws_nothing, orig_expr, extype, nothing, result.source)
@@ -690,7 +828,26 @@ with this macro. Use [`@test_logs`](@ref) instead.
 """
 macro test_nowarn(expr)
     quote
-        @test_warn r"^(?!.)"s $(esc(expr))
+        # Duplicate some code from `@test_warn` to allow printing the content of
+        # `stderr` again to `stderr` here while suppressing it for `@test_warn`.
+        # If that shouldn't be used, it would be possible to just use
+        #     @test_warn isempty $(esc(expr))
+        # here.
+        let fname = tempname()
+            try
+                ret = open(fname, "w") do f
+                    redirect_stderr(f) do
+                        $(esc(expr))
+                    end
+                end
+                stderr_content = read(fname, String)
+                print(stderr, stderr_content) # this is helpful for debugging
+                @test isempty(stderr_content)
+                ret
+            finally
+                rm(fname, force=true)
+            end
+        end
     end
 end
 
@@ -717,9 +874,20 @@ function record end
     finish(ts::AbstractTestSet)
 
 Do any final processing necessary for the given testset. This is called by the
-`@testset` infrastructure after a test block executes. One common use for this
-function is to record the testset to the parent's results list, using
-`get_testset`.
+`@testset` infrastructure after a test block executes.
+
+Custom `AbstractTestSet` subtypes should call `record` on their parent (if there
+is one) to add themselves to the tree of test results. This might be implemented
+as:
+
+```julia
+if get_testset_depth() != 0
+    # Attach this test set to the parent test set
+    parent_ts = get_testset()
+    record(parent_ts, self)
+    return self
+end
+```
 """
 function finish end
 
@@ -1148,7 +1316,7 @@ function testset_beginend(args, tests, source)
         local oldrng = copy(RNG)
         try
             # RNG is re-seeded with its own seed to ease reproduce a failed test
-            Random.seed!(RNG.seed)
+            Random.seed!(Random.GLOBAL_SEED)
             let
                 $(esc(tests))
             end
@@ -1156,7 +1324,7 @@ function testset_beginend(args, tests, source)
             err isa InterruptException && rethrow()
             # something in the test block threw an error. Count that as an
             # error in this test set
-            record(ts, Error(:nontest_error, Expr(:tuple), err, Base.catch_stack(), $(QuoteNode(source))))
+            record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
         finally
             copy!(RNG, oldrng)
             pop_testset()
@@ -1230,7 +1398,7 @@ function testset_forloop(args, testloop, source)
             err isa InterruptException && rethrow()
             # Something in the test block threw an error. Count that as an
             # error in this test set
-            record(ts, Error(:nontest_error, Expr(:tuple), err, Base.catch_stack(), $(QuoteNode(source))))
+            record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
         end
     end
     quote
@@ -1239,7 +1407,7 @@ function testset_forloop(args, testloop, source)
         local ts
         local RNG = default_rng()
         local oldrng = copy(RNG)
-        Random.seed!(RNG.seed)
+        Random.seed!(Random.GLOBAL_SEED)
         local tmprng = copy(RNG)
         try
             let
@@ -1412,30 +1580,32 @@ function _inferred(ex, mod, allow = :(Union{}))
         ex = Expr(:call, GlobalRef(Test, :_materialize_broadcasted),
             farg, ex.args[2:end]...)
     end
-    Base.remove_linenums!(quote
-        let
-            allow = $(esc(allow))
-            allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
-            $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
-                # Has keywords
-                args = gensym()
-                kwargs = gensym()
-                quote
-                    $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
-                    inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
-                end
-            else
-                # No keywords
-                quote
-                    args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
-                    result = $(esc(ex.args[1]))(args...)
-                    inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
-                end
-            end)
-            @assert length(inftypes) == 1
-            rettype = result isa Type ? Type{result} : typeof(result)
-            rettype <: allow || rettype == typesplit(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
-            result
+    Base.remove_linenums!(let ex = ex;
+        quote
+            let
+                allow = $(esc(allow))
+                allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
+                $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
+                    # Has keywords
+                    args = gensym()
+                    kwargs = gensym()
+                    quote
+                        $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
+                        inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
+                    end
+                else
+                    # No keywords
+                    quote
+                        args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
+                        result = $(esc(ex.args[1]))(args...)
+                        inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
+                    end
+                end)
+                @assert length(inftypes) == 1
+                rettype = result isa Type ? Type{result} : typeof(result)
+                rettype <: allow || rettype == typesplit(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
+                result
+            end
         end
     end)
 end
@@ -1478,7 +1648,7 @@ function detect_ambiguities(mods::Module...;
         for m in Base.MethodList(mt)
             is_in_mods(m.module, recursive, mods) || continue
             ambig = Int32[0]
-            ms = Base._methods_by_ftype(m.sig, -1, typemax(UInt), true, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
+            ms = Base._methods_by_ftype(m.sig, nothing, -1, typemax(UInt), true, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
             ambig[1] == 0 && continue
             isa(ms, Bool) && continue
             for match2 in ms
@@ -1710,7 +1880,7 @@ end
 
 "`guardseed(f, seed)` is equivalent to running `Random.seed!(seed); f()` and
 then restoring the state of the global RNG as it was before."
-guardseed(f::Function, seed::Union{Vector{UInt32},Integer}) = guardseed() do
+guardseed(f::Function, seed::Union{Vector{UInt64},Vector{UInt32},Integer,NTuple{4,UInt64}}) = guardseed() do
     Random.seed!(seed)
     f()
 end
diff --git a/stdlib/Test/src/logging.jl b/stdlib/Test/src/logging.jl
index 7e8659a4c2f2ab..37d6854e118281 100644
--- a/stdlib/Test/src/logging.jl
+++ b/stdlib/Test/src/logging.jl
@@ -1,8 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Logging
-import Logging: Info,
-    shouldlog, handle_message, min_enabled_level, catch_exceptions
+using Logging: Logging, AbstractLogger, LogLevel, Info, with_logger
 import Base: occursin
 
 #-------------------------------------------------------------------------------
@@ -30,22 +28,23 @@ mutable struct TestLogger <: AbstractLogger
     shouldlog_args
 end
 
-TestLogger(; min_level=Info, catch_exceptions=false) = TestLogger(LogRecord[], min_level, catch_exceptions, nothing)
-min_enabled_level(logger::TestLogger) = logger.min_level
+TestLogger(; min_level=Info, catch_exceptions=false) =
+    TestLogger(LogRecord[], min_level, catch_exceptions, nothing)
+Logging.min_enabled_level(logger::TestLogger) = logger.min_level
 
-function shouldlog(logger::TestLogger, level, _module, group, id)
+function Logging.shouldlog(logger::TestLogger, level, _module, group, id)
     logger.shouldlog_args = (level, _module, group, id)
     true
 end
 
-function handle_message(logger::TestLogger, level, msg, _module,
-                        group, id, file, line; kwargs...)
+function Logging.handle_message(logger::TestLogger, level, msg, _module,
+                                group, id, file, line; kwargs...)
     @nospecialize
     push!(logger.logs, LogRecord(level, msg, _module, group, id, file, line, kwargs))
 end
 
 # Catch exceptions for the test logger only if specified
-catch_exceptions(logger::TestLogger) = logger.catch_exceptions
+Logging.catch_exceptions(logger::TestLogger) = logger.catch_exceptions
 
 function collect_test_logs(f; kwargs...)
     logger = TestLogger(; kwargs...)
@@ -135,21 +134,28 @@ We can test the info message using
 If we also wanted to test the debug messages, these need to be enabled with the
 `min_level` keyword:
 
-    @test_logs (:info,"Doing foo with n=2") (:debug,"Iteration 1") (:debug,"Iteration 2") min_level=Debug foo(2)
+    using Logging
+    @test_logs (:info,"Doing foo with n=2") (:debug,"Iteration 1") (:debug,"Iteration 2") min_level=Logging.Debug foo(2)
 
 If you want to test that some particular messages are generated while ignoring the rest,
 you can set the keyword `match_mode=:any`:
 
-    @test_logs (:info,) (:debug,"Iteration 42") min_level=Debug match_mode=:any foo(100)
+    using Logging
+    @test_logs (:info,) (:debug,"Iteration 42") min_level=Logging.Debug match_mode=:any foo(100)
 
 The macro may be chained with `@test` to also test the returned value:
 
     @test (@test_logs (:info,"Doing foo with n=2") foo(2)) == 42
 
-If you want to test an absence of logger messages, you can pass no log_patterns:
+If you want to test for the absence of warnings, you can omit specifying log
+patterns and set the `min_level` accordingly:
 
-    @test_logs min_level=Logging.Warn f()  # test `f` logs no messages when the logger level is warn.
+    # test that the expression logs no messages when the logger level is warn:
+    @test_logs min_level=Logging.Warn @info("Some information") # passes
+    @test_logs min_level=Logging.Warn @warn("Some information") # fails
 
+If you want to test the absence of warnings (or error messages) in
+[`stderr`](@ref) which are not generated by `@warn`, see [`@test_nowarn`](@ref).
 """
 macro test_logs(exs...)
     length(exs) >= 1 || throw(ArgumentError("""`@test_logs` needs at least one arguments.
@@ -173,13 +179,13 @@ macro test_logs(exs...)
                     $(esc(expression))
                 end
                 if didmatch
-                    testres = Pass(:test, nothing, nothing, value)
+                    testres = Pass(:test, $orig_expr, nothing, value, $sourceloc)
                 else
                     testres = LogTestFailure($orig_expr, $sourceloc,
                                              $(QuoteNode(exs[1:end-1])), logs)
                 end
             catch e
-                testres = Error(:test_error, $orig_expr, e, Base.catch_stack(), $sourceloc)
+                testres = Error(:test_error, $orig_expr, e, Base.current_exceptions(), $sourceloc)
             end
             Test.record(Test.get_testset(), testres)
             value
@@ -265,4 +271,3 @@ macro test_deprecated(exs...)
     res.args[4].args[3].args[2].args[2].args[2] = __source__
     res
 end
-
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index 541e8dcc2bf7d9..d260c5840d6262 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -25,6 +25,18 @@ import Logging: Debug, Info, Warn
     @test isapprox(1, 2; atol)
     @test isapprox(1, 3; a.atol)
 end
+@testset "@test with skip/broken kwargs" begin
+    # Make sure the local variables can be used in conditions
+    a = 1
+    @test 2 + 2 == 4 broken=false
+    @test error() broken=true
+    @test !Sys.iswindows() broken=Sys.iswindows()
+    @test 1 ≈ 2 atol=1 broken=a==2
+    @test false skip=true
+    @test true skip=false
+    @test Grogu skip=isone(a)
+    @test 41 ≈ 42 rtol=1 skip=false
+end
 @testset "@test keyword precedence" begin
     atol = 2
     # post-semicolon keyword, suffix keyword, pre-semicolon keyword
@@ -84,6 +96,16 @@ end
                    "Thrown: ErrorException")
     @test endswith(sprint(show, @test_throws ErrorException("test") error("test")),
                    "Thrown: ErrorException")
+    @test endswith(sprint(show, @test_throws "a test" error("a test")),
+                   "Message: \"a test\"")
+    @test occursin("Message: \"DomainError",
+                   sprint(show, @test_throws r"sqrt\([Cc]omplex" sqrt(-1)))
+    @test endswith(sprint(show, @test_throws str->occursin("a t", str) error("a test")),
+                   "Message: \"a test\"")
+    @test endswith(sprint(show, @test_throws ["BoundsError", "access", "1-element", "at index [2]"] [1][2]),
+                   "Message: \"BoundsError: attempt to access 1-element Vector{$Int} at index [2]\"")
+    @test_throws "\"" throw("\"")
+    @test_throws Returns(false) throw(Returns(false))
 end
 # Test printing of Fail results
 include("nothrow_testset.jl")
@@ -134,6 +156,13 @@ let fails = @testset NoThrowTestSet begin
         @test endswith(str1, str2)
         # 21 - Fail - contains
         @test contains(str1, str2)
+        # 22 - Fail - Type Comparison
+        @test typeof(1) <: typeof("julia")
+        # 23 - 26 - Fail - wrong message
+        @test_throws "A test" error("a test")
+        @test_throws r"sqrt\([Cc]omplx" sqrt(-1)
+        @test_throws str->occursin("a T", str) error("a test")
+        @test_throws ["BoundsError", "acess", "1-element", "at index [2]"] [1][2]
     end
     for fail in fails
         @test fail isa Test.Fail
@@ -243,6 +272,32 @@ let fails = @testset NoThrowTestSet begin
         @test occursin("Expression: contains(str1, str2)", str)
         @test occursin("Evaluated: contains(\"Hello\", \"World\")", str)
     end
+
+    let str = sprint(show, fails[22])
+        @test occursin("Expression: typeof(1) <: typeof(\"julia\")", str)
+        @test occursin("Evaluated: $(typeof(1)) <: $(typeof("julia"))", str)
+    end
+
+    let str = sprint(show, fails[23])
+        @test occursin("Expected: \"A test\"", str)
+        @test occursin("Message: \"a test\"", str)
+    end
+
+    let str = sprint(show, fails[24])
+        @test occursin("Expected: r\"sqrt\\([Cc]omplx\"", str)
+        @test occursin(r"Message: .*Try sqrt\(Complex", str)
+    end
+
+    let str = sprint(show, fails[25])
+        @test occursin("Expected: < match function >", str)
+        @test occursin("Message: \"a test\"", str)
+    end
+
+    let str = sprint(show, fails[26])
+        @test occursin("Expected: [\"BoundsError\", \"acess\", \"1-element\", \"at index [2]\"]", str)
+        @test occursin(r"Message: \"BoundsError.* 1-element.*at index \[2\]", str)
+    end
+
 end
 
 let errors = @testset NoThrowTestSet begin
@@ -269,7 +324,7 @@ end
 
 let retval_tests = @testset NoThrowTestSet begin
         ts = Test.DefaultTestSet("Mock for testing retval of record(::DefaultTestSet, ::T <: Result) methods")
-        pass_mock = Test.Pass(:test, 1, 2, LineNumberNode(0, "A Pass Mock"))
+        pass_mock = Test.Pass(:test, 1, 2, 3, LineNumberNode(0, "A Pass Mock"))
         @test Test.record(ts, pass_mock) isa Test.Pass
         error_mock = Test.Error(:test, 1, 2, 3, LineNumberNode(0, "An Error Mock"))
         @test Test.record(ts, error_mock) isa Test.Error
@@ -491,7 +546,7 @@ import Test: record, finish
 using Test: get_testset_depth, get_testset
 using Test: AbstractTestSet, Result, Pass, Fail, Error
 struct CustomTestSet <: Test.AbstractTestSet
-    description::AbstractString
+    description::String
     foo::Int
     results::Vector
     # constructor takes a description string and options keyword arguments
@@ -834,7 +889,7 @@ let code = quote
 end
 
 @testset "@testset preserves GLOBAL_RNG's state, and re-seeds it" begin
-    # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_RNG.seed)` block
+    # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_SEED)` block
     seed = rand(UInt128)
     Random.seed!(seed)
     a = rand()
@@ -969,7 +1024,7 @@ end
 
 let ex = :(something_complex + [1, 2, 3])
     b = PipeBuffer()
-    let t = Test.Pass(:test, (ex, 1), (ex, 2), (ex, 3))
+    let t = Test.Pass(:test, (ex, 1), (ex, 2), (ex, 3), LineNumberNode(@__LINE__, @__FILE__))
         serialize(b, t)
         @test string(t) == string(deserialize(b))
         @test eof(b)
@@ -1138,3 +1193,49 @@ let errors = @testset NoThrowTestSet begin
         @test occursin("Expression: !(1 < 2 < missing < 4)", str)
     end
 end
+
+macro test_macro_throw_1()
+    throw(ErrorException("Real error"))
+end
+macro test_macro_throw_2()
+    throw(LoadError("file", 111, ErrorException("Real error")))
+end
+
+@testset "Soft deprecation of @test_throws LoadError [@]macroexpand[1]" begin
+    # If a macroexpand was detected, undecorated LoadErrors can stand in for any error.
+    # This will throw a deprecation warning.
+    @test_deprecated (@test_throws LoadError macroexpand(@__MODULE__, :(@test_macro_throw_1)))
+    @test_deprecated (@test_throws LoadError @macroexpand @test_macro_throw_1)
+    # Decorated LoadErrors are unwrapped if the actual exception matches the inner, but not the outer, exception, regardless of whether or not a macroexpand is detected.
+    # This will not throw a deprecation warning.
+    @test_throws LoadError("file", 111, ErrorException("Real error")) macroexpand(@__MODULE__, :(@test_macro_throw_1))
+    @test_throws LoadError("file", 111, ErrorException("Real error")) @macroexpand @test_macro_throw_1
+    # Decorated LoadErrors are not unwrapped if a LoadError was thrown.
+    @test_throws LoadError("file", 111, ErrorException("Real error")) @macroexpand @test_macro_throw_2
+end
+
+# Issue 25483
+mutable struct PassInformationTestSet <: Test.AbstractTestSet
+    results::Vector
+    PassInformationTestSet(desc) = new([])
+end
+Test.record(ts::PassInformationTestSet, t::Test.Result) = (push!(ts.results, t); t)
+Test.finish(ts::PassInformationTestSet) = ts
+@testset "Information in Pass result (Issue 25483)" begin
+    ts = @testset PassInformationTestSet begin
+        @test 1 == 1
+        @test_throws ErrorException throw(ErrorException("Msg"))
+    end
+    test_line_number = (@__LINE__) - 3
+    test_throws_line_number =  (@__LINE__) - 3
+    @test ts.results[1].test_type == :test
+    @test ts.results[1].orig_expr == :(1 == 1)
+    @test ts.results[1].data == Expr(:comparison, 1, :(==), 1)
+    @test ts.results[1].value == true
+    @test ts.results[1].source == LineNumberNode(test_line_number, @__FILE__)
+    @test ts.results[2].test_type == :test_throws
+    @test ts.results[2].orig_expr == :(throw(ErrorException("Msg")))
+    @test ts.results[2].data == ErrorException
+    @test ts.results[2].value == ErrorException("Msg")
+    @test ts.results[2].source == LineNumberNode(test_throws_line_number, @__FILE__)
+end
diff --git a/stdlib/Zlib_jll/Project.toml b/stdlib/Zlib_jll/Project.toml
index e5ee91ef281dc0..cafaf9c1b577ca 100644
--- a/stdlib/Zlib_jll/Project.toml
+++ b/stdlib/Zlib_jll/Project.toml
@@ -7,3 +7,9 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/dSFMT_jll/Project.toml b/stdlib/dSFMT_jll/Project.toml
index bd938bbed171cd..4e3e80f918f0b8 100644
--- a/stdlib/dSFMT_jll/Project.toml
+++ b/stdlib/dSFMT_jll/Project.toml
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
index 6954f96dff80c6..9985671b217c14 100644
--- a/stdlib/libLLVM_jll/Project.toml
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -1,10 +1,16 @@
 name = "libLLVM_jll"
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "11.0.1+3"
+version = "12.0.1+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.6"
+julia = "1.7"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/libLLVM_jll/src/libLLVM_jll.jl b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
index 702ed5c0ef8672..5186cb13e0be99 100644
--- a/stdlib/libLLVM_jll/src/libLLVM_jll.jl
+++ b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
@@ -19,11 +19,11 @@ libLLVM_handle = C_NULL
 libLLVM_path = ""
 
 if Sys.iswindows()
-    const libLLVM = "LLVM.dll"
+    const libLLVM = "libLLVM.dll"
 elseif Sys.isapple()
     const libLLVM = "@rpath/libLLVM.dylib"
 else
-    const libLLVM = "libLLVM-11jl.so"
+    const libLLVM = "libLLVM-12jl.so"
 end
 
 function __init__()
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
index 6b107b13480901..b8d84b80b527f0 100644
--- a/stdlib/libblastrampoline_jll/Project.toml
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -1,12 +1,17 @@
 name = "libblastrampoline_jll"
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "3.0.2+0"
+version = "3.1.0+0"
 
 [deps]
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
 
 [compat]
 julia = "1.7"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/nghttp2_jll/Project.toml b/stdlib/nghttp2_jll/Project.toml
index e9a2145c26180f..3051afe57d23a9 100644
--- a/stdlib/nghttp2_jll/Project.toml
+++ b/stdlib/nghttp2_jll/Project.toml
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/p7zip_jll/Project.toml b/stdlib/p7zip_jll/Project.toml
index 8f7751eefa6a04..75e04b6362fdf9 100644
--- a/stdlib/p7zip_jll/Project.toml
+++ b/stdlib/p7zip_jll/Project.toml
@@ -8,3 +8,9 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/sysimage.mk b/sysimage.mk
index ebc991bdd40f0b..de5c3e22f253a3 100644
--- a/sysimage.mk
+++ b/sysimage.mk
@@ -65,7 +65,8 @@ $(build_private_libdir)/corecompiler.ji: $(COMPILER_SRCS)
 
 $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS)
 	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
-	if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) $(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
+	if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
+			$(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
 			--startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl $(RELBUILDROOT); then \
 		echo '*** This error might be fixed by running `make clean`. If the error persists$(COMMA) try `make cleanall`. ***'; \
 		false; \
@@ -75,8 +76,9 @@ $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAH
 define sysimg_builder
 $$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji
 	@$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \
-	if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) $$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
-		--startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \
+	if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
+			$$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
+			--startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \
 		echo '*** This error is usually fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \
 		false; \
 	fi )
diff --git a/test/Makefile b/test/Makefile
index 9ca65ffa9d7c77..3d16f88a741b8a 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -35,8 +35,11 @@ embedding:
 gcext:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(GCEXT_ARGS)
 
+clangsa:
+	@$(MAKE) -C $(SRCDIR)/$@
+
 clean:
 	@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
 	@$(MAKE) -C gcext $@ $(GCEXT_ARGS)
 
-.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clean
+.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clangsa clean
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index dd24dc28364c7d..32c367a7a50a84 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -299,10 +299,10 @@ function test_scalar_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
     B = T(A)
     @test A == B
     # Test indexing up to 5 dimensions
-    trailing5 = CartesianIndex(ntuple(x->1, max(ndims(B)-5, 0)))
-    trailing4 = CartesianIndex(ntuple(x->1, max(ndims(B)-4, 0)))
-    trailing3 = CartesianIndex(ntuple(x->1, max(ndims(B)-3, 0)))
-    trailing2 = CartesianIndex(ntuple(x->1, max(ndims(B)-2, 0)))
+    trailing5 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-5, 0)))
+    trailing4 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-4, 0)))
+    trailing3 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-3, 0)))
+    trailing2 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-2, 0)))
     i=0
     for i5 = 1:size(B, 5)
         for i4 = 1:size(B, 4)
@@ -419,10 +419,10 @@ function test_vector_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
         N = prod(shape)
         A = reshape(Vector(1:N), shape)
         B = T(A)
-        trailing5 = CartesianIndex(ntuple(x->1, max(ndims(B)-5, 0)))
-        trailing4 = CartesianIndex(ntuple(x->1, max(ndims(B)-4, 0)))
-        trailing3 = CartesianIndex(ntuple(x->1, max(ndims(B)-3, 0)))
-        trailing2 = CartesianIndex(ntuple(x->1, max(ndims(B)-2, 0)))
+        trailing5 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-5, 0)))
+        trailing4 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-4, 0)))
+        trailing3 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-3, 0)))
+        trailing2 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-2, 0)))
         idxs = rand(1:N, 3, 3, 3)
         @test B[idxs] == A[idxs] == idxs
         @test B[vec(idxs)] == A[vec(idxs)] == vec(idxs)
@@ -598,6 +598,31 @@ function test_get(::Type{TestAbstractArray})
     @test get(TSlow([]), (), 0) == 0
     @test get(TSlow([1]), (), 0) == 1
     @test get(TSlow(fill(1)), (), 0) == 1
+
+    global c = 0
+    f() = (global c = c+1; 0)
+    @test get(f, A, ()) == 0
+    @test c == 1
+    @test get(f, B, ()) == 0
+    @test c == 2
+    @test get(f, A, (1,)) == get(f, A, 1) == A[1] == 1
+    @test c == 2
+    @test get(f, B, (1,)) == get(f, B, 1) == B[1] == 1
+    @test c == 2
+    @test get(f, A, (25,)) == get(f, A, 25) == 0
+    @test c == 4
+    @test get(f, B, (25,)) == get(f, B, 25) == 0
+    @test c == 6
+    @test get(f, A, (1,1,1)) == A[1,1,1] == 1
+    @test get(f, B, (1,1,1)) == B[1,1,1] == 1
+    @test get(f, A, (1,1,3)) == 0
+    @test c == 7
+    @test get(f, B, (1,1,3)) == 0
+    @test c == 8
+    @test get(f, TSlow([]), ()) == 0
+    @test c == 9
+    @test get(f, TSlow([1]), ()) == 1
+    @test get(f, TSlow(fill(1)), ()) == 1
 end
 
 function test_cat(::Type{TestAbstractArray})
@@ -835,6 +860,11 @@ end
 @testset "ndims and friends" begin
     @test ndims(Diagonal(rand(1:5,5))) == 2
     @test ndims(Diagonal{Float64}) == 2
+    @test ndims(Diagonal) == 2
+    @test ndims(Vector) == 1
+    @test ndims(Matrix) == 2
+    @test ndims(Array{<:Any, 0}) == 0
+    @test_throws MethodError ndims(Array)
 end
 
 @testset "Issue #17811" begin
@@ -857,6 +887,18 @@ end
 @testset "to_shape" begin
     @test Base.to_shape(()) === ()
     @test Base.to_shape(1) === 1
+    @test Base.to_shape(big(1)) === Base.to_shape(1)
+    @test Base.to_shape(Int8(1)) === Base.to_shape(1)
+end
+
+@testset "issue #39923: similar" begin
+    for ax in [(big(2), big(3)), (big(2), 3), (UInt64(2), 3), (2, UInt32(3)),
+        (big(2), Base.OneTo(3)), (Base.OneTo(2), Base.OneTo(big(3)))]
+
+        A = similar(ones(), Int, ax)
+        @test axes(A) === (Base.OneTo(2), Base.OneTo(3))
+        @test eltype(A) === Int
+    end
 end
 
 @testset "issue #19267" begin
@@ -1280,3 +1322,252 @@ end
     @test Int[0 t...; t... 0] == [0 1 2; 1 2 0]
     @test_throws ArgumentError Int[t...; 3 4 5]
 end
+
+@testset "issue #39896, modified getindex " begin
+    for arr = ([1:10;], reshape([1.0:16.0;],4,4), reshape(['a':'h';],2,2,2))
+        for inds = (2:5, Base.OneTo(5), BigInt(3):BigInt(5), UInt(4):UInt(3),
+            Base.IdentityUnitRange(Base.OneTo(4)))
+            @test arr[inds] == arr[collect(inds)]
+            @test arr[inds] isa AbstractVector{eltype(arr)}
+        end
+    end
+    # Test that ranges and arrays behave identically for indices with 1-based axes
+    for r in (1:10, 1:1:10, Base.OneTo(10),
+        Base.IdentityUnitRange(Base.OneTo(10)), Base.IdentityUnitRange(1:10))
+        for inds = (2:5, Base.OneTo(5), BigInt(3):BigInt(5), UInt(4):UInt(3),
+            Base.IdentityUnitRange(Base.OneTo(4)))
+            @test r[inds] == r[collect(inds)] == collect(r)[inds] == collect(r)[collect(inds)]
+        end
+    end
+    for arr = ([1], reshape([1.0],1,1), reshape(['a'],1,1,1))
+        @test arr[true:true] == [arr[1]]
+        @test arr[true:true] isa AbstractVector{eltype(arr)}
+        @test arr[false:false] == []
+        @test arr[false:false] isa AbstractVector{eltype(arr)}
+    end
+    for arr = ([1:10;], reshape([1.0:16.0;],4,4), reshape(['a':'h';],2,2,2))
+        @test_throws BoundsError arr[true:true]
+        @test_throws BoundsError arr[false:false]
+    end
+end
+
+using Base: typed_hvncat
+@testset "hvncat" begin
+    a = fill(1, (2,3,2,4,5))
+    b = fill(2, (1,1,2,4,5))
+    c = fill(3, (1,2,2,4,5))
+    d = fill(4, (1,1,1,4,5))
+    e = fill(5, (1,1,1,4,5))
+    f = fill(6, (1,1,1,4,5))
+    g = fill(7, (2,3,1,4,5))
+    h = fill(8, (3,3,3,1,2))
+    i = fill(9, (3,2,3,3,2))
+    j = fill(10, (3,1,3,3,2))
+
+    result = [a; b c ;;; d e f ; g ;;;;; h ;;;; i j]
+    @test size(result) == (3,3,3,4,7)
+    @test result == [a; [b ;; c] ;;; [d e f] ; g ;;;;; h ;;;; i ;; j]
+    @test result == cat(cat([a ; b c], [d e f ; g], dims = 3), cat(h, [i j], dims = 4), dims = 5)
+
+    # terminating semicolons extend dimensions
+    @test [1;] == [1]
+    @test [1;;] == fill(1, (1,1))
+
+    for v in (1, fill(1), fill(1,1,1), fill(1, 1, 1, 1))
+        @test_throws ArgumentError [v; v;; v]
+        @test_throws ArgumentError [v; v;; v; v; v]
+        @test_throws ArgumentError [v; v; v;; v; v]
+        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v;; v; v]
+        @test_throws ArgumentError [v; v;; v; v;;; v; v]
+        @test_throws ArgumentError [v; v;; v; v;;; v; v; v;; v; v]
+        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v; v]
+        # ensure a wrong shape with the right number of elements doesn't pass through
+        @test_throws ArgumentError [v; v;; v; v;;; v; v; v; v]
+
+        @test [v; v;; v; v] == fill(1, ndims(v) == 3 ? (2, 2, 1) : (2,2))
+        @test [v; v;; v; v;;;] == fill(1, 2, 2, 1)
+        @test [v; v;; v; v] == fill(1, ndims(v) == 3 ? (2, 2, 1) : (2,2))
+        @test [v v; v v;;;] == fill(1, 2, 2, 1)
+        @test [v; v;; v; v;;; v; v;; v; v;;] == fill(1, 2, 2, 2)
+        @test [v; v; v;; v; v; v;;; v; v; v;; v; v; v;;] == fill(1, 3, 2, 2)
+        @test [v v; v v;;; v v; v v] == fill(1, 2, 2, 2)
+        @test [v v v; v v v;;; v v v; v v v] == fill(1, 2, 3, 2)
+    end
+
+    # mixed scalars and arrays work, for numbers and strings
+    for v = (1, "test")
+        @test [v v;;; fill(v, 1, 2)] == fill(v, 1, 2, 2)
+    end
+
+    # output dimensions are maximum of input dimensions and concatenation dimension
+    begin
+        v1 = fill(1, 1, 1)
+        v2 = fill(1, 1, 1, 1, 1)
+        v3 = fill(1, 1, 2, 1, 1)
+        @test [v1 ;;; v2] == [1 ;;; 1 ;;;;]
+        @test [v2 ;;; v1] == [1 ;;; 1 ;;;;]
+        @test [v3 ;;; v1 v1] == [1 1 ;;; 1 1 ;;;;]
+        @test [v1 v1 ;;; v3] == [1 1 ;;; 1 1 ;;;;]
+        @test [v2 v1 ;;; v1 v1] == [1 1 ;;; 1 1 ;;;;]
+        @test [v1 v1 ;;; v1 v2] == [1 1 ;;; 1 1 ;;;;]
+        @test [v2 ;;; 1] == [1 ;;; 1 ;;;;]
+        @test [1 ;;; v2] == [1 ;;; 1 ;;;;]
+        @test [v3 ;;; 1 v1] == [1 1 ;;; 1 1 ;;;;]
+        @test [v1 1 ;;; v3] == [1 1 ;;; 1 1 ;;;;]
+        @test [v2 1 ;;; v1 v1] == [1 1 ;;; 1 1 ;;;;]
+        @test [v1 1 ;;; v1 v2] == [1 1 ;;; 1 1 ;;;;]
+    end
+
+    # dims form
+    for v ∈ ((), (1,), ([1],), (1, [1]), ([1], 1), ([1], [1]))
+        # reject dimension < 0
+        @test_throws ArgumentError hvncat(-1, v...)
+
+        # reject shape tuple with no elements
+        @test_throws ArgumentError hvncat(((),), true, v...)
+    end
+
+    # reject dims or shape with negative or zero values
+    for v1 ∈ (-1, 0, 1)
+        for v2 ∈ (-1, 0, 1)
+            v1 == v2 == 1 && continue
+            for v3 ∈ ((), (1,), ([1],), (1, [1]), ([1], 1), ([1], [1]))
+                @test_throws ArgumentError hvncat((v1, v2), true, v3...)
+                @test_throws str->(occursin("`shape` argument must consist of positive integers", str) ||
+                                   occursin("reducing over an empty collection is not allowed", str)) hvncat(((v1,), (v2,)), true, v3...)
+            end
+        end
+    end
+
+    for v ∈ ((1, [1]), ([1], 1), ([1], [1]))
+        # reject shape with more than one end value
+        @test_throws ArgumentError hvncat(((1, 1),), true, v...)
+    end
+
+    for v ∈ ((1, 2, 3), (1, 2, [3]), ([1], [2], [3]))
+        # reject shape with more values in later level
+        @test_throws ArgumentError hvncat(((2, 1), (1, 1, 1)), true, v...)
+    end
+
+    # reject shapes that don't nest evenly between levels (e.g. 1 + 2 does not fit into 2)
+    @test_throws ArgumentError hvncat(((1, 2, 1), (2, 2), (4,)), true, [1 2], [3], [4], [1 2; 3 4])
+
+    # zero-length arrays are handled appropriately
+    @test [zeros(Int, 1, 2, 0) ;;; 1 3] == [1 3;;;]
+    @test [[] ;;; [] ;;; []] == Array{Any}(undef, 0, 1, 3)
+    @test [[] ; 1 ;;; 2 ; []] == [1 ;;; 2]
+    @test [[] ; [] ;;; [] ; []] == Array{Any}(undef, 0, 1, 2)
+    @test [[] ; 1 ;;; 2] == [1 ;;; 2]
+    @test [[] ; [] ;;; [] ;;; []] == Array{Any}(undef, 0, 1, 3)
+    z = zeros(Int, 0, 0, 0)
+    [z z ; z ;;; z ;;; z] == Array{Int}(undef, 0, 0, 0)
+
+    for v1 ∈ (zeros(Int, 0, 0), zeros(Int, 0, 0, 0, 0), zeros(Int, 0, 0, 0, 0, 0, 0, 0))
+        for v2 ∈ (1, [1])
+            for v3 ∈ (2, [2])
+                @test_throws ArgumentError [v1 ;;; v2]
+                @test_throws ArgumentError [v1 ;;; v2 v3]
+                @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+            end
+        end
+    end
+    v1 = zeros(Int, 0, 0, 0)
+    for v2 ∈ (1, [1])
+        for v3 ∈ (2, [2])
+            # current behavior, not potentially dangerous.
+            # should throw error like above loop
+            @test [v1 ;;; v2 v3] == [v2 v3;;;]
+            @test_throws ArgumentError [v1 ;;; v2]
+            @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+        end
+    end
+
+    # 0-dimension behaviors
+    # exactly one argument, placed in an array
+    # if already an array, copy, with type conversion as necessary
+    @test_throws ArgumentError hvncat(0)
+    @test hvncat(0, 1) == fill(1)
+    @test hvncat(0, [1]) == [1]
+    @test_throws ArgumentError hvncat(0, 1, 1)
+    @test_throws ArgumentError typed_hvncat(Float64, 0)
+    @test typed_hvncat(Float64, 0, 1) == fill(1.0)
+    @test typed_hvncat(Float64, 0, [1]) == Float64[1.0]
+    @test_throws ArgumentError typed_hvncat(Float64, 0, 1, 1)
+    @test_throws ArgumentError hvncat((), true) == []
+    @test hvncat((), true, 1) == fill(1)
+    @test hvncat((), true, [1]) == [1]
+    @test_throws ArgumentError hvncat((), true, 1, 1)
+    @test_throws ArgumentError typed_hvncat(Float64, (), true) == Float64[]
+    @test typed_hvncat(Float64, (), true, 1) == fill(1.0)
+    @test typed_hvncat(Float64, (), true, [1]) == [1.0]
+    @test_throws ArgumentError typed_hvncat(Float64, (), true, 1, 1)
+
+    # 1-dimension behaviors
+    # int form
+    @test hvncat(1) == []
+    @test hvncat(1, 1) == [1]
+    @test hvncat(1, [1]) == [1]
+    @test hvncat(1, [1 2; 3 4]) == [1 2; 3 4]
+    @test hvncat(1, 1, 1) == [1 ; 1]
+    @test typed_hvncat(Float64, 1) == Float64[]
+    @test typed_hvncat(Float64, 1, 1) == Float64[1.0]
+    @test typed_hvncat(Float64, 1, [1]) == Float64[1.0]
+    @test typed_hvncat(Float64, 1, 1, 1) == Float64[1.0 ; 1.0]
+    # dims form
+    @test_throws ArgumentError hvncat((1,), true)
+    @test hvncat((2,), true, 1, 1) == [1; 1]
+    @test hvncat((2,), true, [1], [1]) == [1; 1]
+    @test_throws ArgumentError hvncat((2,), true, 1)
+    @test typed_hvncat(Float64, (2,), true, 1, 1) == Float64[1.0; 1.0]
+    @test typed_hvncat(Float64, (2,), true, [1], [1]) == Float64[1.0; 1.0]
+    @test_throws ArgumentError typed_hvncat(Float64, (2,), true, 1)
+    # row_first has no effect with just one dimension of the dims form
+    @test hvncat((2,), false, 1, 1) == [1; 1]
+    @test typed_hvncat(Float64, (2,), false, 1, 1) == Float64[1.0; 1.0]
+    # shape form
+    @test hvncat(((2,),), true, 1, 1) == [1 1]
+    @test hvncat(((2,),), true, [1], [1]) == [1 1]
+    @test_throws ArgumentError hvncat(((2,),), true, 1)
+    @test hvncat(((2,),), false, 1, 1) == [1; 1]
+    @test hvncat(((2,),), false, [1], [1]) == [1; 1]
+    @test typed_hvncat(Float64, ((2,),), true, 1, 1) == Float64[1.0 1.0]
+    @test typed_hvncat(Float64, ((2,),), true, [1], [1]) == Float64[1.0 1.0]
+    @test_throws ArgumentError typed_hvncat(Float64, ((2,),), true, 1)
+    @test typed_hvncat(Float64, ((2,),), false, 1, 1) == Float64[1.0; 1.0]
+    @test typed_hvncat(Float64, ((2,),), false, [1], [1]) == Float64[1.0; 1.0]
+
+    # zero-value behaviors for int form above dimension zero
+    # e.g. [;;], [;;;], though that isn't valid syntax
+    @test [] == hvncat(1) isa Array{Any, 1}
+    @test Array{Any, 2}(undef, 0, 0) == hvncat(2) isa Array{Any, 2}
+    @test Array{Any, 3}(undef, 0, 0, 0) == hvncat(3) isa Array{Any, 3}
+    @test Int[] == typed_hvncat(Int, 1) isa Array{Int, 1}
+    @test Array{Int, 2}(undef, 0, 0) == typed_hvncat(Int, 2) isa Array{Int, 2}
+    @test Array{Int, 3}(undef, 0, 0, 0) == typed_hvncat(Int, 3) isa Array{Int, 3}
+end
+
+@testset "keepat!" begin
+    a = [1:6;]
+    @test a === keepat!(a, 1:5)
+    @test a == 1:5
+    @test keepat!(a, [2, 4]) == [2, 4]
+    @test isempty(keepat!(a, []))
+
+    a = [1:6;]
+    @test_throws BoundsError keepat!(a, 1:10) # make sure this is not a no-op
+    @test_throws BoundsError keepat!(a, 2:10)
+    @test_throws ArgumentError keepat!(a, [2, 4, 3])
+
+    b = BitVector([1, 1, 1, 0, 0])
+    @test b === keepat!(b, 1:5)
+    @test b == [1, 1, 1, 0, 0]
+    @test keepat!(b, 2:4) == [1, 1, 0]
+    @test_throws BoundsError keepat!(a, -1:10)
+    @test_throws ArgumentError keepat!(a, [2, 1])
+    @test isempty(keepat!(a, []))
+end
+
+@testset "reshape methods for AbstractVectors" begin
+    r = Base.IdentityUnitRange(3:4)
+    @test reshape(r, :) === reshape(r, (:,)) === r
+end
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index bad5f19f389c9f..0516d9a74e436a 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -66,7 +66,7 @@ end
 ## Other ways of accessing functions
 # Test that non-ambiguous cases work
 let io = IOBuffer()
-    @test @test_logs precompile(ambig, (Int, Int))
+    @test precompile(ambig, (Int, Int))
     cf = @eval @cfunction(ambig, Int, (Int, Int))
     @test ccall(cf, Int, (Int, Int), 1, 2) == 4
     @test length(code_lowered(ambig, (Int, Int))) == 1
@@ -75,7 +75,7 @@ end
 
 # Test that ambiguous cases fail appropriately
 let io = IOBuffer()
-    @test @test_logs (:warn,) precompile(ambig, (UInt8, Int))
+    @test !precompile(ambig, (UInt8, Int))
     cf = @eval @cfunction(ambig, Int, (UInt8, Int))  # test for a crash (doesn't throw an error)
     @test_throws(MethodError(ambig, (UInt8(1), Int(2)), get_world_counter()),
                  ccall(cf, Int, (UInt8, Int), 1, 2))
@@ -348,7 +348,7 @@ f35983(::Type, ::Type) = 2
 @test length(Base.methods(f35983, (Any, Any))) == 2
 @test first(Base.methods(f35983, (Any, Any))).sig == Tuple{typeof(f35983), Type, Type}
 let ambig = Int32[0]
-    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, -1, typemax(UInt), true, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
+    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, typemax(UInt), true, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
     @test length(ms) == 1
     @test ambig[1] == 0
 end
@@ -356,17 +356,34 @@ f35983(::Type{Int16}, ::Any) = 3
 @test length(Base.methods_including_ambiguous(f35983, (Type, Type))) == 2
 @test length(Base.methods(f35983, (Type, Type))) == 2
 let ambig = Int32[0]
-    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, -1, typemax(UInt), true, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
+    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, typemax(UInt), true, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
     @test length(ms) == 2
     @test ambig[1] == 1
 end
 
 struct B38280 <: Real; val; end
 let ambig = Int32[0]
-    ms = Base._methods_by_ftype(Tuple{Type{B38280}, Any}, 1, typemax(UInt), false, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
+    ms = Base._methods_by_ftype(Tuple{Type{B38280}, Any}, nothing, 1, typemax(UInt), false, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
     @test ms isa Vector
     @test length(ms) == 1
     @test ambig[1] == 1
 end
 
+# issue #11407
+f11407(::Dict{K,V}, ::Dict{Any,V}) where {K,V} = 1
+f11407(::Dict{K,V}, ::Dict{K,Any}) where {K,V} = 2
+@test_throws MethodError f11407(Dict{Any,Any}(), Dict{Any,Any}()) # ambiguous
+@test f11407(Dict{Any,Int}(), Dict{Any,Int}()) == 1
+f11407(::Dict{Any,Any}, ::Dict{Any,Any}) where {K,V} = 3
+@test f11407(Dict{Any,Any}(), Dict{Any,Any}()) == 3
+
+# issue #12814
+abstract type A12814{N, T} end
+struct B12814{N, T} <: A12814{N, T}
+    x::NTuple{N, T}
+end
+(::Type{T})(x::X) where {T <: A12814, X <: Array} = 1
+@test_throws MethodError B12814{3, Float64}([1, 2, 3]) # ambiguous
+@test B12814{3,Float64}((1, 2, 3)).x === (1.0, 2.0, 3.0)
+
 nothing
diff --git a/test/arrayops.jl b/test/arrayops.jl
index 7ea2cf15d812d0..f2905aa0b582a2 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -701,6 +701,10 @@ end
         perm = randperm(4)
         @test isequal(A,permutedims(permutedims(A,perm),invperm(perm)))
         @test isequal(A,permutedims(permutedims(A,invperm(perm)),perm))
+
+        @test sum(permutedims(A,perm)) ≈ sum(PermutedDimsArray(A,perm))
+        @test sum(permutedims(A,perm), dims=2) ≈ sum(PermutedDimsArray(A,perm), dims=2)
+        @test sum(permutedims(A,perm), dims=(2,4)) ≈ sum(PermutedDimsArray(A,perm), dims=(2,4))
     end
 
     m = [1 2; 3 4]
@@ -1102,6 +1106,11 @@ end
     @test isequal(intersect([1,2,3], Float64[]), Float64[])
     @test isequal(intersect(Int64[], [1,2,3]), Int64[])
     @test isequal(intersect(Int64[]), Int64[])
+    @test isequal(intersect([1, 3], 1:typemax(Int)), [1, 3])
+    @test isequal(intersect(1:typemax(Int), [1, 3]), [1, 3])
+    @test isequal(intersect([1, 2, 3], 2:0.1:5), [2., 3.])
+    @test isequal(intersect([1.0, 2.0, 3.0], 2:5), [2., 3.])
+
     @test isequal(setdiff([1,2,3,4], [2,5,4]), [1,3])
     @test isequal(setdiff([1,2,3,4], [7,8,9]), [1,2,3,4])
     @test isequal(setdiff([1,2,3,4], Int64[]), Int64[1,2,3,4])
@@ -1153,17 +1162,17 @@ end
     # issue #5177
 
     c = fill(1,2,3,4)
-    m1 = mapslices(x-> fill(1,2,3), c, dims=[1,2])
-    m2 = mapslices(x-> fill(1,2,4), c, dims=[1,3])
-    m3 = mapslices(x-> fill(1,3,4), c, dims=[2,3])
+    m1 = mapslices(_ -> fill(1,2,3), c, dims=[1,2])
+    m2 = mapslices(_ -> fill(1,2,4), c, dims=[1,3])
+    m3 = mapslices(_ -> fill(1,3,4), c, dims=[2,3])
     @test size(m1) == size(m2) == size(m3) == size(c)
 
-    n1 = mapslices(x-> fill(1,6), c, dims=[1,2])
-    n2 = mapslices(x-> fill(1,6), c, dims=[1,3])
-    n3 = mapslices(x-> fill(1,6), c, dims=[2,3])
-    n1a = mapslices(x-> fill(1,1,6), c, dims=[1,2])
-    n2a = mapslices(x-> fill(1,1,6), c, dims=[1,3])
-    n3a = mapslices(x-> fill(1,1,6), c, dims=[2,3])
+    n1 =  mapslices(_ -> fill(1,6)  , c, dims=[1,2])
+    n2 =  mapslices(_ -> fill(1,6)  , c, dims=[1,3])
+    n3 =  mapslices(_ -> fill(1,6)  , c, dims=[2,3])
+    n1a = mapslices(_ -> fill(1,1,6), c, dims=[1,2])
+    n2a = mapslices(_ -> fill(1,1,6), c, dims=[1,3])
+    n3a = mapslices(_ -> fill(1,1,6), c, dims=[2,3])
     @test size(n1a) == (1,6,4) && size(n2a) == (1,3,6)  && size(n3a) == (2,1,6)
     @test size(n1) == (6,1,4) && size(n2) == (6,3,1)  && size(n3) == (2,6,1)
 
@@ -1659,7 +1668,7 @@ end
 Nmax = 3 # TODO: go up to CARTESIAN_DIMS+2 (currently this exposes problems)
 for N = 1:Nmax
     #indexing with (UnitRange, UnitRange, UnitRange)
-    args = ntuple(d->UnitRange{Int}, N)
+    args = ntuple(Returns(UnitRange{Int}), N)
     @test Base.return_types(getindex, Tuple{Array{Float32, N}, args...}) == [Array{Float32, N}]
     @test Base.return_types(getindex, Tuple{BitArray{N}, args...}) == Any[BitArray{N}]
     @test Base.return_types(setindex!, Tuple{Array{Float32, N}, Array{Int, 1}, args...}) == [Array{Float32, N}]
@@ -1786,7 +1795,7 @@ end
         @test mdsum(A) == 15
         @test mdsum2(A) == 15
         AA = reshape(aa, tuple(2, shp...))
-        B = view(AA, 1:1, ntuple(i->Colon(), i)...)
+        B = view(AA, 1:1, ntuple(Returns(:), i)...)
         @test isa(Base.IndexStyle(B), Base.IteratorsMD.IndexCartesian)
         @test mdsum(B) == 15
         @test mdsum2(B) == 15
@@ -1799,7 +1808,7 @@ end
         A = reshape(a, tuple(shp...))
         @test mdsum(A) == 55
         @test mdsum2(A) == 55
-        B = view(A, ntuple(i->Colon(), i)...)
+        B = view(A, ntuple(Returns(:), i)...)
         @test mdsum(B) == 55
         @test mdsum2(B) == 55
         insert!(shp, 2, 1)
@@ -2656,7 +2665,7 @@ let TT = Union{UInt8, Int8}
     resize!(b, 1)
     @assert pointer(a) == pa
     @assert pointer(b) == pb
-    unsafe_store!(pa, 0x1, 2) # reset a[2] to 1
+    unsafe_store!(Ptr{UInt8}(pa), 0x1, 2) # reset a[2] to 1
     @test length(a) == length(b) == 1
     @test a[1] == b[1] == 0x0
     @test a == b
@@ -2904,3 +2913,13 @@ end
     @test [fill(1); fill(2, (2,1,1))] == reshape([1; 2; 2], (3, 1, 1))
     @test_throws DimensionMismatch [fill(1); rand(2, 2, 2)]
 end
+
+@testset "eltype of zero for arrays (issue #41348)" begin
+    for a in Any[[DateTime(2020), DateTime(2021)], [Date(2000), Date(2001)], [Time(1), Time(2)]]
+        @test a + zero(a) == a
+        b = reshape(a, :, 1)
+        @test b + zero(b) == b
+        c = view(b, 1:1, 1:1)
+        @test c + zero(c) == c
+    end
+end
diff --git a/test/atomics.jl b/test/atomics.jl
new file mode 100644
index 00000000000000..c53471ed0da26b
--- /dev/null
+++ b/test/atomics.jl
@@ -0,0 +1,372 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Base.Threads
+using Core: ConcurrencyViolationError
+import Base: copy
+
+const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+
+mutable struct ARefxy{T}
+    @atomic x::T
+    y::T
+    ARefxy(x::T, y::T) where {T} = new{T}(x, y)
+    ARefxy{T}(x, y) where {T} = new{T}(x, y)
+    ARefxy{T}() where {T} = new{T}()
+end
+
+mutable struct Refxy{T}
+    x::T
+    y::T
+    Refxy(x::T, y::T) where {T} = new{T}(x, y)
+    Refxy{T}(x, y) where {T} = new{T}(x, y)
+    Refxy{T}() where {T} = new() # unused, but sets ninitialized to 0
+end
+
+@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+    @atomic x::T
+    @atomic y::T
+end
+@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+    x::T
+    y::T
+end
+@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+    x::T
+    @atomic y::T
+end
+@test_throws ErrorException("invalid redefinition of constant Refxy") @eval mutable struct Refxy{T}
+    x::T
+    @atomic y::T
+end
+
+copy(r::Union{Refxy,ARefxy}) = typeof(r)(r.x, r.y)
+function add(x::T, y)::T where {T}; x + y; end
+swap(x, y) = y
+
+let T1 = Refxy{NTuple{3,UInt8}},
+    T2 = ARefxy{NTuple{3,UInt8}}
+    @test sizeof(T1) == 6
+    @test sizeof(T2) == 8
+    @test fieldoffset(T1, 1) == 0
+    @test fieldoffset(T2, 1) == 0
+    @test fieldoffset(T1, 2) == 3
+    @test fieldoffset(T2, 2) == 4
+    @test !Base.datatype_haspadding(T1)
+    @test Base.datatype_haspadding(T2)
+    @test Base.datatype_alignment(T1) == 1
+    @test Base.datatype_alignment(T2) == 4
+end
+
+# check that very large types are getting locks
+let (x, y) = (Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+    ar = ARefxy(x, y)
+    r = Refxy(x, y)
+    @test 64 == sizeof(r) < sizeof(ar)
+    @test sizeof(r) == sizeof(ar) - Int(fieldoffset(typeof(ar), 1))
+end
+
+struct PadIntA <: Number # internal padding
+    a::Int8
+    b::Int16
+    PadIntA(x) = new(82, x)
+end
+struct PadIntB <: Number # external padding
+    a::UInt8
+    b::UInt8
+    c::UInt8
+    PadIntB(x) = new(x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff)
+end
+primitive type Int24 <: Signed 24 end # integral padding
+Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x)
+Base.Int(x::PadIntB) = x.a + (Int(x.b) << 8) + (Int(x.c) << 16)
+Base.:(+)(x::PadIntA, b::Int) = PadIntA(x.b + b)
+Base.:(+)(x::PadIntB, b::Int) = PadIntB(Int(x) + b)
+Base.:(+)(x::Int24, b::Int) = Core.Intrinsics.add_int(x, Int24(b))
+Base.show(io::IO, x::PadIntA) = print(io, "PadIntA(", x.b, ")")
+Base.show(io::IO, x::PadIntB) = print(io, "PadIntB(", Int(x), ")")
+Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int, x), ")")
+
+@noinline function _test_field_operators(r)
+    r = r[]
+    TT = fieldtype(typeof(r), :x)
+    T = typeof(getfield(r, :x))
+    @test getfield(r, :x, :sequentially_consistent) === T(123_10)
+    @test setfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_1)
+    @test getfield(r, :x, :sequentially_consistent) === T(123_1)
+    @test replacefield!(r, :x, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), false))
+    @test replacefield!(r, :x, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), true))
+    @test getfield(r, :x, :sequentially_consistent) === T(123_30)
+    @test replacefield!(r, :x, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_30), false))
+    @test getfield(r, :x, :sequentially_consistent) === T(123_30)
+    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_30), T(123_31))
+    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_31), T(123_32))
+    @test getfield(r, :x, :sequentially_consistent) === T(123_32)
+    @test swapfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_32)
+    @test getfield(r, :x, :sequentially_consistent) === T(123_1)
+    nothing
+end
+@noinline function test_field_operators(r)
+    _test_field_operators(Ref(copy(r)))
+    _test_field_operators(Ref{Any}(copy(r)))
+    nothing
+end
+test_field_operators(ARefxy{Int}(123_10, 123_20))
+test_field_operators(ARefxy{Any}(123_10, 123_20))
+test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
+test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
+test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
+test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
+test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
+#FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
+test_field_operators(ARefxy{Float64}(123_10, 123_20))
+
+@noinline function _test_field_orderings(r, x, y)
+    @nospecialize x y
+    r = r[]
+    TT = fieldtype(typeof(r), :x)
+
+    @test getfield(r, :x) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :x, :u)
+    @test_throws ConcurrencyViolationError("getfield: atomic field cannot be accessed non-atomically") getfield(r, :x, :not_atomic)
+    @test getfield(r, :x, :unordered) === x
+    @test getfield(r, :x, :monotonic) === x
+    @test getfield(r, :x, :acquire) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :x, :release) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :x, :acquire_release) === x
+    @test getfield(r, :x, :sequentially_consistent) === x
+    @test isdefined(r, :x)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :x, :u)
+    @test_throws ConcurrencyViolationError("isdefined: atomic field cannot be accessed non-atomically") isdefined(r, :x, :not_atomic)
+    @test isdefined(r, :x, :unordered)
+    @test isdefined(r, :x, :monotonic)
+    @test isdefined(r, :x, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :x, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :x, :acquire_release)
+    @test isdefined(r, :x, :sequentially_consistent)
+
+    @test getfield(r, :y) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :y, :u)
+    @test getfield(r, :y, :not_atomic) === y
+    @test_throws ConcurrencyViolationError("getfield: non-atomic field cannot be accessed atomically") getfield(r, :y, :unordered)
+    @test_throws ConcurrencyViolationError("getfield: non-atomic field cannot be accessed atomically") getfield(r, :y, :monotonic)
+    @test_throws ConcurrencyViolationError("getfield: non-atomic field cannot be accessed atomically") getfield(r, :y, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :y, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :y, :acquire_release)
+    @test_throws ConcurrencyViolationError("getfield: non-atomic field cannot be accessed atomically") getfield(r, :y, :sequentially_consistent)
+    @test isdefined(r, :y)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :y, :u)
+    @test isdefined(r, :y, :not_atomic)
+    @test_throws ConcurrencyViolationError("isdefined: non-atomic field cannot be accessed atomically") isdefined(r, :y, :unordered)
+    @test_throws ConcurrencyViolationError("isdefined: non-atomic field cannot be accessed atomically") isdefined(r, :y, :monotonic)
+    @test_throws ConcurrencyViolationError("isdefined: non-atomic field cannot be accessed atomically") isdefined(r, :y, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :y, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :y, :acquire_release)
+    @test_throws ConcurrencyViolationError("isdefined: non-atomic field cannot be accessed atomically") isdefined(r, :y, :sequentially_consistent)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :x, y, :u)
+    @test_throws ConcurrencyViolationError("setfield!: atomic field cannot be written non-atomically") setfield!(r, :x, y)
+    @test_throws ConcurrencyViolationError("setfield!: atomic field cannot be written non-atomically") setfield!(r, :x, y, :not_atomic)
+    @test getfield(r, :x) === x
+    @test setfield!(r, :x, y, :unordered) === y
+    @test setfield!(r, :x, y, :monotonic) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :x, y, :acquire) === y
+    @test setfield!(r, :x, y, :release) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :x, y, :acquire_release) === y
+    @test setfield!(r, :x, y, :sequentially_consistent) === y
+    @test getfield(r, :x) === y
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :y, x, :u)
+    @test_throws ConcurrencyViolationError("setfield!: non-atomic field cannot be written atomically") setfield!(r, :y, x, :unordered)
+    @test_throws ConcurrencyViolationError("setfield!: non-atomic field cannot be written atomically") setfield!(r, :y, x, :monotonic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :y, x, :acquire)
+    @test_throws ConcurrencyViolationError("setfield!: non-atomic field cannot be written atomically") setfield!(r, :y, x, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :y, x, :acquire_release)
+    @test_throws ConcurrencyViolationError("setfield!: non-atomic field cannot be written atomically") setfield!(r, :y, x, :sequentially_consistent)
+    @test getfield(r, :y) === y
+    @test setfield!(r, :y, x) === x
+    @test setfield!(r, :y, x, :not_atomic) === x
+    @test getfield(r, :y) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :y, y, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :y, y, :unordered)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :monotonic)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :acquire)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :release)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :acquire_release)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :sequentially_consistent)
+    @test swapfield!(r, :y, y, :not_atomic) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :y, swap, y, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :y, swap, y, :unordered)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :monotonic)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :acquire)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :release)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :acquire_release)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :sequentially_consistent)
+    @test modifyfield!(r, :y, swap, x, :not_atomic) === Pair{TT,TT}(y, x)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :u, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :unordered, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :monotonic, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :acquire, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :release, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :acquire_release, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :sequentially_consistent, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :unordered)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :monotonic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :acquire_release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :sequentially_consistent)
+    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === ReplaceType{TT}((x, true))
+    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === ReplaceType{TT}((y, x === y))
+    @test replacefield!(r, :y, y, y, :not_atomic) === ReplaceType{TT}((y, true))
+    @test replacefield!(r, :y, y, y) === ReplaceType{TT}((y, true))
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :x, x, :u)
+    @test_throws ConcurrencyViolationError("swapfield!: atomic field cannot be written non-atomically") swapfield!(r, :x, x, :not_atomic)
+    @test_throws ConcurrencyViolationError("swapfield!: atomic field cannot be written non-atomically") swapfield!(r, :x, x)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :x, x, :unordered) === y
+    @test swapfield!(r, :x, x, :monotonic) === y
+    @test swapfield!(r, :x, x, :acquire) === x
+    @test swapfield!(r, :x, x, :release) === x
+    @test swapfield!(r, :x, x, :acquire_release) === x
+    @test swapfield!(r, :x, x, :sequentially_consistent) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :x, swap, x, :u)
+    @test_throws ConcurrencyViolationError("modifyfield!: atomic field cannot be written non-atomically") modifyfield!(r, :x, swap, x, :not_atomic)
+    @test_throws ConcurrencyViolationError("modifyfield!: atomic field cannot be written non-atomically") modifyfield!(r, :x, swap, x)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :x, swap, x, :unordered)
+    @test modifyfield!(r, :x, swap, x, :monotonic) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :acquire) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :release) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :acquire_release) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :sequentially_consistent) === Pair{TT,TT}(x, x)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :u, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be written non-atomically") replacefield!(r, :x, x, x)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be written non-atomically") replacefield!(r, :x, y, x, :not_atomic, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :unordered, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :monotonic, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :acquire, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :release, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :acquire_release, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :sequentially_consistent, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :unordered)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :monotonic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :acquire_release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :sequentially_consistent)
+    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((x, true))
+    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((y, x === y))
+    @test replacefield!(r, :x, y, x, :sequentially_consistent) === ReplaceType{TT}((y, true))
+    nothing
+end
+@noinline function test_field_orderings(r, x, y)
+    _test_field_orderings(Ref(copy(r)), x, y)
+    _test_field_orderings(Ref{Any}(copy(r)), x, y)
+    nothing
+end
+@noinline test_field_orderings(x, y) = (@nospecialize; test_field_orderings(ARefxy(x, y), x, y))
+test_field_orderings(10, 20)
+test_field_orderings(true, false)
+test_field_orderings("hi", "bye")
+test_field_orderings(:hi, :bye)
+test_field_orderings(nothing, nothing)
+test_field_orderings(ARefxy{Any}(123_10, 123_20), 123_10, 123_20)
+test_field_orderings(ARefxy{Any}(true, false), true, false)
+test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
+test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
+test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_field_orderings(10.0, 20.0)
+test_field_orderings(NaN, Inf)
+
+struct UndefComplex{T}
+    re::T
+    im::T
+    UndefComplex{T}() where {T} = new{T}()
+end
+Base.convert(T::Type{<:UndefComplex}, S) = T()
+@noinline function _test_field_undef(r)
+    r = r[]
+    TT = fieldtype(typeof(r), :x)
+    x = convert(TT, 12345_10)
+    @test_throws UndefRefError getfield(r, :x)
+    @test_throws UndefRefError getfield(r, :x, :sequentially_consistent)
+    @test_throws UndefRefError modifyfield!(r, :x, add, 1, :sequentially_consistent)
+    @test_throws (TT === Any ? UndefRefError : TypeError) replacefield!(r, :x, 1, 1.0, :sequentially_consistent)
+    @test_throws UndefRefError replacefield!(r, :x, 1, x, :sequentially_consistent)
+    @test_throws UndefRefError getfield(r, :x, :sequentially_consistent)
+    @test_throws UndefRefError swapfield!(r, :x, x, :sequentially_consistent)
+    @test getfield(r, :x, :sequentially_consistent) === x === getfield(r, :x)
+    nothing
+end
+@noinline function test_field_undef(TT)
+    _test_field_undef(Ref(TT()))
+    _test_field_undef(Ref{Any}(TT()))
+    nothing
+end
+test_field_undef(ARefxy{BigInt})
+test_field_undef(ARefxy{Any})
+test_field_undef(ARefxy{Union{Nothing,Integer}})
+test_field_undef(ARefxy{UndefComplex{Any}})
+test_field_undef(ARefxy{UndefComplex{UndefComplex{Any}}})
+
+@test_throws ErrorException @macroexpand @atomic foo()
+@test_throws ErrorException @macroexpand @atomic foo += bar
+@test_throws ErrorException @macroexpand @atomic foo += bar
+@test_throws ErrorException @macroexpand @atomic foo = bar
+@test_throws ErrorException @macroexpand @atomic foo()
+@test_throws ErrorException @macroexpand @atomic foo(bar)
+@test_throws ErrorException @macroexpand @atomic foo(bar, baz)
+@test_throws ErrorException @macroexpand @atomic foo(bar, baz, bax)
+@test_throws ErrorException @macroexpand @atomicreplace foo bar
+
+# test macroexpansions
+let a = ARefxy(1, -1)
+    @test 1 === @atomic a.x
+    @test 2 === @atomic :sequentially_consistent a.x = 2
+    @test 3 === @atomic :monotonic a.x = 3
+    local four = 4
+    @test 4 === @atomic :monotonic a.x = four
+    @test 3 === @atomic :monotonic a.x = four - 1
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x = 2
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x += 1
+
+    @test 3 === @atomic :monotonic a.x
+    @test 5 === @atomic a.x += 2
+    @test 4 === @atomic :monotonic a.x -= 1
+    @test 12 === @atomic :monotonic a.x *= 3
+
+    @test 12 === @atomic a.x
+    @test (12 => 13) === @atomic a.x + 1
+    @test (13 => 15) === @atomic :monotonic a.x + 2
+    @test (15 => 19) === @atomic a.x max 19
+    @test (19 => 20) === @atomic :monotonic a.x max 20
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x + 1
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x max 30
+
+    @test 20 === @atomic a.x
+    @test 20 === @atomicswap a.x = 1
+    @test 1 === @atomicswap :monotonic a.x = 2
+    @test_throws ConcurrencyViolationError @atomicswap :not_atomic a.x = 1
+
+    @test 2 === @atomic a.x
+    @test ReplaceType{Int}((2, true)) === @atomicreplace a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic :monotonic a.x 2 => 1
+    @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x 1 => 2
+    @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x 1 => 2
+
+    @test 1 === @atomic a.x
+    xchg = 1 => 2
+    @test ReplaceType{Int}((1, true)) === @atomicreplace a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :monotonic a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :acquire_release :monotonic a.x xchg
+    @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x xchg
+    @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
+end
diff --git a/test/binaryplatforms.jl b/test/binaryplatforms.jl
index 6f336bc48ad50d..793a9b1f06a410 100644
--- a/test/binaryplatforms.jl
+++ b/test/binaryplatforms.jl
@@ -368,7 +368,7 @@ end
 
 
     # Next, an asymmetric comparison strategy.  We'll create a "less than or equal to" constraint
-    # that uses the `{a,b}_requested` paramters to determine which number represents the limit.
+    # that uses the `{a,b}_requested` parameters to determine which number represents the limit.
     function less_than_constraint(a::String, b::String, a_requested::Bool, b_requested::Bool)
         a = parse(Int, a)
         b = parse(Int, b)
diff --git a/test/bitarray.jl b/test/bitarray.jl
index f2469424520201..b565252664876e 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -832,6 +832,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b1, b2)  BitMatrix
         @check_bit_operation broadcast(|, b1, b2)  BitMatrix
         @check_bit_operation broadcast(xor, b1, b2)  BitMatrix
+        @check_bit_operation broadcast(nand, b1, b2)  BitMatrix
+        @check_bit_operation broadcast(nor, b1, b2)  BitMatrix
         @check_bit_operation (+)(b1, b2)  Matrix{Int}
         @check_bit_operation (-)(b1, b2)  Matrix{Int}
         @check_bit_operation broadcast(*, b1, b2) BitMatrix
@@ -861,6 +863,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b0, b0)  BitVector
         @check_bit_operation broadcast(|, b0, b0)  BitVector
         @check_bit_operation broadcast(xor, b0, b0)  BitVector
+        @check_bit_operation broadcast(nand, b0, b0)  BitVector
+        @check_bit_operation broadcast(nor, b0, b0)  BitVector
         @check_bit_operation broadcast(*, b0, b0) BitVector
         @check_bit_operation (*)(b0, b0') BitMatrix
     end
@@ -871,6 +875,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(|, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(xor, b1, i2)  Matrix{Int}
+        @check_bit_operation broadcast(nand, b1, i2)  Matrix{Int}
+        @check_bit_operation broadcast(nor, b1, i2)  Matrix{Int}
         @check_bit_operation (+)(b1, i2)  Matrix{Int}
         @check_bit_operation (-)(b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(*, b1, i2) Matrix{Int}
@@ -902,6 +908,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(|, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(xor, i1, b2)  Matrix{Int}
+        @check_bit_operation broadcast(nand, i1, b2)  Matrix{Int}
+        @check_bit_operation broadcast(nor, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(+, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(-, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(*, i1, b2) Matrix{Int}
@@ -909,6 +917,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(|, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(xor, u1, b2)  Matrix{UInt8}
+        @check_bit_operation broadcast(nand, u1, b2)  Matrix{UInt8}
+        @check_bit_operation broadcast(nor, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(+, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(-, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(*, u1, b2) Matrix{UInt8}
@@ -986,6 +996,14 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(xor, b1, false)  BitMatrix
         @check_bit_operation broadcast(xor, true, b1)   BitMatrix
         @check_bit_operation broadcast(xor, false, b1)  BitMatrix
+        @check_bit_operation broadcast(nand, b1, true)   BitMatrix
+        @check_bit_operation broadcast(nand, b1, false)  BitMatrix
+        @check_bit_operation broadcast(nand, true, b1)   BitMatrix
+        @check_bit_operation broadcast(nand, false, b1)  BitMatrix
+        @check_bit_operation broadcast(nor, b1, true)   BitMatrix
+        @check_bit_operation broadcast(nor, b1, false)  BitMatrix
+        @check_bit_operation broadcast(nor, true, b1)   BitMatrix
+        @check_bit_operation broadcast(nor, false, b1)  BitMatrix
         @check_bit_operation broadcast(+, b1, true)   Matrix{Int}
         @check_bit_operation broadcast(+, b1, false)  Matrix{Int}
         @check_bit_operation broadcast(-, b1, true)   Matrix{Int}
@@ -1002,12 +1020,18 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b1, b2)  BitMatrix
         @check_bit_operation broadcast(|, b1, b2)  BitMatrix
         @check_bit_operation broadcast(xor, b1, b2)  BitMatrix
+        @check_bit_operation broadcast(nand, b1, b2)  BitMatrix
+        @check_bit_operation broadcast(nor, b1, b2)  BitMatrix
         @check_bit_operation broadcast(&, b2, b1)  BitMatrix
         @check_bit_operation broadcast(|, b2, b1)  BitMatrix
         @check_bit_operation broadcast(xor, b2, b1)  BitMatrix
+        @check_bit_operation broadcast(nand, b2, b1)  BitMatrix
+        @check_bit_operation broadcast(nor, b2, b1)  BitMatrix
         @check_bit_operation broadcast(&, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(|, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(xor, b1, i2)  Matrix{Int}
+        @check_bit_operation broadcast(nand, b1, i2)  Matrix{Int}
+        @check_bit_operation broadcast(nor, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(+, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(-, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(*, b1, i2) Matrix{Int}
@@ -1018,6 +1042,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(|, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(xor, b1, u2)  Matrix{UInt8}
+        @check_bit_operation broadcast(nand, b1, u2)  Matrix{UInt8}
+        @check_bit_operation broadcast(nor, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(+, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(-, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(*, b1, u2) Matrix{UInt8}
@@ -1086,6 +1112,14 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(xor, b1, transpose(b3))  BitMatrix
         @check_bit_operation broadcast(xor, b2, b1)             BitMatrix
         @check_bit_operation broadcast(xor, transpose(b3), b1)  BitMatrix
+        @check_bit_operation broadcast(nand, b1, b2)             BitMatrix
+        @check_bit_operation broadcast(nand, b1, transpose(b3))  BitMatrix
+        @check_bit_operation broadcast(nand, b2, b1)             BitMatrix
+        @check_bit_operation broadcast(nand, transpose(b3), b1)  BitMatrix
+        @check_bit_operation broadcast(nor, b1, b2)             BitMatrix
+        @check_bit_operation broadcast(nor, b1, transpose(b3))  BitMatrix
+        @check_bit_operation broadcast(nor, b2, b1)             BitMatrix
+        @check_bit_operation broadcast(nor, transpose(b3), b1)  BitMatrix
         @check_bit_operation broadcast(+, b1, b2)             Matrix{Int}
         @check_bit_operation broadcast(+, b1, transpose(b3))  Matrix{Int}
         @check_bit_operation broadcast(+, b2, b1)             Matrix{Int}
@@ -1180,8 +1214,8 @@ timesofar("datamove")
 
         @check_bit_operation findfirst(x->x, b1)     Union{Int,Nothing}
         @check_bit_operation findfirst(x->!x, b1)    Union{Int,Nothing}
-        @check_bit_operation findfirst(x->true, b1)  Union{Int,Nothing}
-        @check_bit_operation findfirst(x->false, b1) Union{Int,Nothing}
+        @check_bit_operation findfirst(Returns(true ), b1)  Union{Int,Nothing}
+        @check_bit_operation findfirst(Returns(false), b1) Union{Int,Nothing}
 
         @check_bit_operation findall(b1) Vector{Int}
     end
@@ -1275,49 +1309,51 @@ timesofar("find")
     @test_throws BoundsError findprevnot(b2, 1001)
     @test_throws BoundsError findprev(!, b2, 1001)
     @test_throws BoundsError findprev(identity, b1, 1001)
-    @test_throws BoundsError findprev(x->false, b1, 1001)
-    @test_throws BoundsError findprev(x->true, b1, 1001)
+    @test_throws BoundsError findprev(Returns(false), b1, 1001)
+    @test_throws BoundsError findprev(Returns(true ), b1, 1001)
     @test findprev(b1, 1000) == findprevnot(b2, 1000) == findprev(!, b2, 1000) == 777
     @test findprev(b1, 777)  == findprevnot(b2, 777)  == findprev(!, b2, 777)  == 777
     @test findprev(b1, 776)  == findprevnot(b2, 776)  == findprev(!, b2, 776)  == 77
     @test findprev(b1, 77)   == findprevnot(b2, 77)   == findprev(!, b2, 77)   == 77
     @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   == nothing
     @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   == nothing
-    @test findprev(identity, b1, -1) == findprev(x->false, b1, -1) == findprev(x->true, b1, -1) == nothing
+    @test findprev(identity, b1, -1) == nothing
+    @test findprev(Returns(false), b1, -1) == nothing
+    @test findprev(Returns(true), b1, -1) == nothing
     @test_throws BoundsError findnext(b1, -1)
     @test_throws BoundsError findnextnot(b2, -1)
     @test_throws BoundsError findnext(!, b2, -1)
     @test_throws BoundsError findnext(identity, b1, -1)
-    @test_throws BoundsError findnext(x->false, b1, -1)
-    @test_throws BoundsError findnext(x->true, b1, -1)
+    @test_throws BoundsError findnext(Returns(false), b1, -1)
+    @test_throws BoundsError findnext(Returns(true), b1, -1)
     @test findnext(b1, 1)    == findnextnot(b2, 1)    == findnext(!, b2, 1)    == 77
     @test findnext(b1, 77)   == findnextnot(b2, 77)   == findnext(!, b2, 77)   == 77
     @test findnext(b1, 78)   == findnextnot(b2, 78)   == findnext(!, b2, 78)   == 777
     @test findnext(b1, 777)  == findnextnot(b2, 777)  == findnext(!, b2, 777)  == 777
     @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  == nothing
     @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) == nothing
-    @test findnext(identity, b1, 1001) == findnext(x->false, b1, 1001) == findnext(x->true, b1, 1001) == nothing
+    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) == nothing
 
     @test findlast(b1) == Base.findlastnot(b2) == 777
     @test findfirst(b1) == Base.findfirstnot(b2) == 77
 
     b0 = BitVector()
-    @test findprev(x->true, b0, -1) == nothing
-    @test_throws BoundsError findprev(x->true, b0, 1)
-    @test_throws BoundsError findnext(x->true, b0, -1)
-    @test findnext(x->true, b0, 1) == nothing
+    @test findprev(Returns(true), b0, -1) == nothing
+    @test_throws BoundsError findprev(Returns(true), b0, 1)
+    @test_throws BoundsError findnext(Returns(true), b0, -1)
+    @test findnext(Returns(true), b0, 1) == nothing
 
     b1 = falses(10)
-    @test findprev(x->true, b1, 5) == 5
-    @test findnext(x->true, b1, 5) == 5
-    @test findprev(x->true, b1, -1) == nothing
-    @test findnext(x->true, b1, 11) == nothing
-    @test findprev(x->false, b1, 5) == nothing
-    @test findnext(x->false, b1, 5) == nothing
-    @test findprev(x->false, b1, -1) == nothing
-    @test findnext(x->false, b1, 11) == nothing
-    @test_throws BoundsError findprev(x->true, b1, 11)
-    @test_throws BoundsError findnext(x->true, b1, -1)
+    @test findprev(Returns(true), b1, 5) == 5
+    @test findnext(Returns(true), b1, 5) == 5
+    @test findprev(Returns(true), b1, -1) == nothing
+    @test findnext(Returns(true), b1, 11) == nothing
+    @test findprev(Returns(false), b1, 5) == nothing
+    @test findnext(Returns(false), b1, 5) == nothing
+    @test findprev(Returns(false), b1, -1) == nothing
+    @test findnext(Returns(false), b1, 11) == nothing
+    @test_throws BoundsError findprev(Returns(true), b1, 11)
+    @test_throws BoundsError findnext(Returns(true), b1, -1)
 
     @testset "issue 32568" for T = (UInt, BigInt)
         for x = (1, 2)
@@ -1382,12 +1418,14 @@ timesofar("reductions")
         b2 = bitrand(l)
         @test map(~, b1) == map(x->~x, b1) == broadcast(~, b1)
         @test map(identity, b1) == map(x->x, b1) == b1
-        @test map(zero, b1) == map(x->false, b1) == falses(l)
-        @test map(one, b1) == map(x->true, b1) == trues(l)
+        @test map(zero, b1) == map(Returns(false), b1) == falses(l)
+        @test map(one, b1) == map(Returns(true), b1) == trues(l)
 
         @test map(&, b1, b2) == map((x,y)->x&y, b1, b2) == broadcast(&, b1, b2)
         @test map(|, b1, b2) == map((x,y)->x|y, b1, b2) == broadcast(|, b1, b2)
         @test map(⊻, b1, b2) == map((x,y)->x⊻y, b1, b2) == broadcast(⊻, b1, b2) == broadcast(xor, b1, b2)
+        @test map(⊼, b1, b2) == map((x,y)->x⊼y, b1, b2) == broadcast(⊼, b1, b2) == broadcast(nand, b1, b2)
+        @test map(⊽, b1, b2) == map((x,y)->x⊽y, b1, b2) == broadcast(⊽, b1, b2) == broadcast(nor, b1, b2)
 
         @test map(^, b1, b2) == map((x,y)->x^y, b1, b2) == b1 .^ b2
         @test map(*, b1, b2) == map((x,y)->x*y, b1, b2) == b1 .* b2
@@ -1407,8 +1445,8 @@ timesofar("reductions")
             @test map!(~, b, b1) == map!(x->~x, b, b1) == broadcast(~, b1) == b
             @test map!(!, b, b1) == map!(x->!x, b, b1) == broadcast(~, b1) == b
             @test map!(identity, b, b1) == map!(x->x, b, b1) == b1 == b
-            @test map!(zero, b, b1) == map!(x->false, b, b1) == falses(l) == b
-            @test map!(one, b, b1) == map!(x->true, b, b1) == trues(l) == b
+            @test map!(zero, b, b1) == map!(Returns(false), b, b1) == falses(l) == b
+            @test map!(one, b, b1) == map!(Returns(true), b, b1) == trues(l) == b
 
             @test map!(&, b, b1, b2) == map!((x,y)->x&y, b, b1, b2) == broadcast(&, b1, b2) == b
             @test map!(|, b, b1, b2) == map!((x,y)->x|y, b, b1, b2) == broadcast(|, b1, b2) == b
@@ -1666,3 +1704,10 @@ end
     @check_bit_operation all!(falses(100), trues(100, 100))
     @check_bit_operation all!(falses(1000), trues(1000, 100))
 end
+
+@testset "multidimensional concatenation returns BitArrays" begin
+    a = BitVector(ones(5))
+    @test typeof([a ;;; a]) <: BitArray
+    @test typeof([a a ;;; a a]) <: BitArray
+    @test typeof([a a ;;; [a a]]) <: BitArray
+end
diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl
index 62a20921bd44ea..e1a7029334a3da 100644
--- a/test/boundscheck_exec.jl
+++ b/test/boundscheck_exec.jl
@@ -251,5 +251,12 @@ if bc_opt == bc_default || bc_opt == bc_off
     @test occursin("vector.body", sprint(code_llvm, g27079, Tuple{Vector{Int}}))
 end
 
+# Boundschecking removal of indices with different type, see #40281
+getindex_40281(v, a, b, c) = @inbounds getindex(v, a, b, c)
+typed_40281 = sprint((io, args...) -> code_warntype(io, args...; optimize=true), getindex_40281, Tuple{Array{Float64, 3}, Int, UInt8, Int})
+if bc_opt == bc_default || bc_opt == bc_off
+    @test occursin("arrayref(false", typed_40281)
+    @test !occursin("arrayref(true", typed_40281)
+end
 
 end
diff --git a/test/broadcast.jl b/test/broadcast.jl
index 6e97d609a42b0d..329bcc602206b4 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -914,6 +914,12 @@ end
     # hit the `foldl` branch:
     @test IndexStyle(bcraw) == IndexCartesian()
     @test reduce(paren, bcraw) == foldl(paren, xs)
+
+    # issue #41055
+    bc = Broadcast.instantiate(Broadcast.broadcasted(Base.literal_pow, Ref(^), [1,2], Ref(Val(2))))
+    @test sum(bc, dims=1, init=0) == [5]
+    bc = Broadcast.instantiate(Broadcast.broadcasted(*, ['a','b'], 'c'))
+    @test prod(bc, dims=1, init="") == ["acbc"]
 end
 
 # treat Pair as scalar:
@@ -951,15 +957,40 @@ p0 = copy(p)
 @test repr(.!) == "Base.Broadcast.BroadcastFunction(!)"
 @test eval(:(.+)) == Base.BroadcastFunction(+)
 
+@testset "Issue #5187: Broadcasting of short-circuiting ops" begin
+    ex = Meta.parse("A .< 1 .|| A .> 2")
+    @test ex == :((A .< 1) .|| (A .> 2))
+    @test ex.head == :.||
+    ex = Meta.parse("A .< 1 .&& A .> 2")
+    @test ex == :((A .< 1) .&& (A .> 2))
+    @test ex.head == :.&&
+
+    A = -1:4
+    @test (A .< 1 .|| A .> 2) == [true, true, false, false, true, true]
+    @test (A .>= 1 .&& A .<= 2) == [false, false, true, true, false, false]
+
+    mutable struct F5187; x; end
+    (f::F5187)(x) = (f.x += x)
+    @test (iseven.(1:4) .&& (F5187(0)).(ones(4))) == [false, 1, false, 2]
+    @test (iseven.(1:4) .|| (F5187(0)).(ones(4))) == [1, true, 2, true]
+    r = 1:4; o = ones(4); f = F5187(0);
+    @test (@. iseven(r) && f(o)) == [false, 1, false, 2]
+    @test (@. iseven(r) || f(o)) == [3, true, 4, true]
+
+    @test (iseven.(1:8) .&& iseven.((F5187(0)).(ones(8))) .&& (F5187(0)).(ones(8))) == [false,false,false,1,false,false,false,2]
+    @test (iseven.(1:8) .|| iseven.((F5187(0)).(ones(8))) .|| (F5187(0)).(ones(8))) == [1,true,true,true,2,true,true,true]
+    r = 1:8; o = ones(8); f1 = F5187(0); f2 = F5187(0)
+    @test (@. iseven(r) && iseven(f1(o)) && f2(o)) == [false,false,false,1,false,false,false,2]
+    @test (@. iseven(r) || iseven(f1(o)) || f2(o)) == [3,true,true,true,4,true,true,true]
+    @test (iseven.(1:8) .&& iseven.((F5187(0)).(ones(8))) .&& (F5187(0)).(ones(8))) == [false,false,false,1,false,false,false,2]
+    @test (iseven.(1:8) .|| iseven.((F5187(0)).(ones(8))) .|| (F5187(0)).(ones(8))) == [1,true,true,true,2,true,true,true]
+end
+
 @testset "Issue #28382: inferrability of broadcast with Union eltype" begin
     @test isequal([1, 2] .+ [3.0, missing], [4.0, missing])
     @test Core.Compiler.return_type(broadcast, Tuple{typeof(+), Vector{Int},
                                                      Vector{Union{Float64, Missing}}}) ==
         Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
-    @test isequal([1, 2] + [3.0, missing], [4.0, missing])
-    @test Core.Compiler.return_type(+, Tuple{Vector{Int},
-                                             Vector{Union{Float64, Missing}}}) ==
-        Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
     @test Core.Compiler.return_type(+, Tuple{Vector{Int},
                                              Vector{Union{Float64, Missing}}}) ==
         Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
@@ -1015,3 +1046,9 @@ end
         @test a_ == dropdims(a .* c, dims=(findall(==(1), size(c))...,))
     end
 end
+
+@testset "Issue #40309: still gives a range after #40320" begin
+    @test Base.broadcasted_kwsyntax(+, [1], [2]) isa Broadcast.Broadcasted{<:Any, <:Any, typeof(+)}
+    @test Broadcast.BroadcastFunction(+)(2:3, 2:3) == 4:2:6
+    @test Broadcast.BroadcastFunction(+)(2:3, 2:3) isa AbstractRange
+end
diff --git a/test/cartesian.jl b/test/cartesian.jl
index 8d2651b6f425f6..b3cb8315decad7 100644
--- a/test/cartesian.jl
+++ b/test/cartesian.jl
@@ -147,6 +147,14 @@ module TestOffsetArray
 end
 
 @testset "CartesianIndices getindex" begin
+    @testset "0D array" begin
+        a = zeros()
+        c = CartesianIndices(a)
+        @test a[c] == a
+        @test c[c] === c
+        @test c[] == CartesianIndex()
+    end
+
     @testset "AbstractUnitRange" begin
         for oinds in [(2, ), (2, 3), (2, 3, 4)]
             A = rand(1:10, oinds)
@@ -159,6 +167,34 @@ end
             @test all(i->A[i]==A[R[i]], R)
             @test all(i->A[i]==A[R[i]], collect(R))
             @test all(i->i in R, collect(R))
+
+            # Indexing a CartesianIndices with another CartesianIndices having the same ndims
+            # forwards the indexing to the component ranges and retains the wrapper
+            @test R[R] === R
+
+            R_array = collect(R)
+
+            all_onetoone = ntuple(x -> 1:1, Val(ndims(R)))
+            R2 = R[all_onetoone...]
+            @test R2 isa CartesianIndices{ndims(R)}
+
+            all_one = ntuple(x -> 1, Val(ndims(R)))
+            @test R2[all_one...] == R_array[all_one...]
+
+            @test R2 == R_array[all_onetoone...]
+
+            R3 = R[ntuple(x -> Colon(), Val(ndims(R)))...]
+            @test R3 === R
+
+            # test a mix of Colons and ranges
+            # up to two leading axes are colons, while the rest are UnitRanges
+            indstrailing = (1:1 for _ in min(ndims(R), 2)+1:ndims(R))
+            R4 = R[(Colon() for _ in 1:min(ndims(R), 2))..., indstrailing...]
+            @test R4 isa CartesianIndices{ndims(R)}
+            indsleading = CartesianIndices(axes(A)[1:min(ndims(A), 2)])
+            for I in indsleading
+                @test R4[I, indstrailing...] == R_array[I, indstrailing...]
+            end
         end
     end
 
@@ -173,6 +209,75 @@ end
 
             # TODO: A[SR] == A[Linearindices(SR)] should hold for StepRange CartesianIndices
             @test_broken A[SR] == A[LinearIndices(SR)]
+
+            # Create a CartesianIndices with StepRange indices to test indexing into it
+            R = CartesianIndices(oinds)
+            R_array = collect(R)
+
+            all_onetoone = ntuple(x -> 1:1, Val(ndims(R)))
+            R2 = R[all_onetoone...]
+            @test R2 isa CartesianIndices{ndims(R)}
+
+            all_one = ntuple(x -> 1, Val(ndims(R)))
+            @test R2[all_one...] == R_array[all_one...]
+            @test R2 == R_array[all_onetoone...]
+
+            R3 = R[ntuple(x -> Colon(), Val(ndims(R)))...]
+            @test R3 === R
+
+            # test a mix of Colons and ranges
+            # up to two leading axes are colons, while the rest are UnitRanges
+            indstrailing = (1:1 for _ in min(ndims(R), 2)+1:ndims(R))
+            R4 = R[(Colon() for _ in 1:min(ndims(R), 2))..., indstrailing...]
+            @test R4 isa CartesianIndices{ndims(R)}
+            indsleading = CartesianIndices(axes(R)[1:min(ndims(R), 2)])
+            for I in indsleading
+                @test R4[I, indstrailing...] == R_array[I, indstrailing...]
+            end
+        end
+
+        # CartesianIndices whole indices have a unit step may be their own axes
+        for oinds in [(1:1:4, ), (1:1:4, 1:1:5), (1:1:4, 1:1:5, 1:1:3)]
+            R = CartesianIndices(oinds)
+            @test R[R] === R
+            # test a mix of UnitRanges and StepRanges
+            R = CartesianIndices((oinds..., 1:3))
+            @test R[R] === R
+            R = CartesianIndices((1:3, oinds...))
+            @test R[R] === R
+        end
+    end
+
+    @testset "logical indexing of CartesianIndices with ranges" begin
+        c = CartesianIndices((1:0, 1:2))
+        c2 = c[true:false, 1:2]
+        @test c2 == c
+
+        for (inds, r) in Any[(1:2, false:true), (1:2, false:true:true),
+            (1:2:3, false:true), (1:2:3, false:true:true)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test c2[1, :] == c[2, :]
+        end
+
+        for (inds, r) in Any[(1:1, true:true), (1:1, true:true:true),
+            (1:1:1, true:true), (1:1:1, true:true:true)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test c2[1, :] == c[1, :]
+        end
+
+        for (inds, r) in Any[(1:1, false:false), (1:1, false:true:false),
+            (1:1:1, false:false), (1:1:1, false:true:false)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test size(c2, 1) == 0
         end
     end
 end
diff --git a/test/ccall.jl b/test/ccall.jl
index 8cb376c8056112..01f0f4f651aa8a 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -842,7 +842,7 @@ function check_code_trampoline(f, t, n::Int)
     @nospecialize(f, t)
     @test Base.return_types(f, t) == Any[Any]
     llvm = sprint(code_llvm, f, t)
-    @test count(x -> true, eachmatch(r"@jl_get_cfunction_trampoline\(", llvm)) == n
+    @test count(Returns(true), eachmatch(r"@jl_get_cfunction_trampoline\(", llvm)) == n
 end
 check_code_trampoline(testclosure, (Any, Any, Bool, Type), 2)
 check_code_trampoline(testclosure, (Any, Int, Bool, Type{Int}), 2)
@@ -906,7 +906,7 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
         global function $fname(s::$t)
             verbose && println("B: ", s)
             @test s == $v
-            if($(t).mutable)
+            if ismutable(s)
                 @test !(s === $a)
             end
             global c = s
@@ -934,7 +934,7 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
         end
         verbose && println("C: ",b)
         @test b == $v
-        if ($(t).mutable)
+        if ismutable($v)
             @test !(b === c)
             @test !(b === a)
         end
@@ -943,7 +943,7 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
         end
         verbose && println("C: ",b)
         @test b == $v
-        if ($(t).mutable)
+        if ismutable($v)
             @test !(b === c)
             @test !(b === a)
         end
@@ -953,7 +953,7 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
         verbose && println("C: ",b)
         @test b == $v
         @test b === c
-        if ($(t).mutable)
+        if ismutable($v)
             @test !(b === a)
         end
         let cf = @cfunction($fname, Any, (Ref{$t},))
@@ -962,7 +962,7 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
         verbose && println("C: ",b)
         @test b == $v
         @test b === c
-        if ($(t).mutable)
+        if ismutable($v)
             @test !(b === a)
         end
         let cf = @cfunction($fname, Any, (Ref{Any},))
@@ -970,7 +970,7 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
         end
         @test b == $v
         @test b === c
-        if ($(t).mutable)
+        if ismutable($v)
             @test !(b === a)
         end
         let cf = @cfunction($fname, Ref{AbstractString}, (Ref{Any},))
@@ -982,6 +982,26 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
     end
 end
 
+
+#issue 40164
+@testset "llvm parameter attributes on cfunction closures" begin
+    struct Struct40164
+        x::Cdouble
+        y::Cdouble
+        z::Cdouble
+    end
+
+    function test_40164()
+        ret = Struct40164[]
+        f = x::Struct40164 -> (push!(ret, x); nothing)
+        f_c = @cfunction($f, Cvoid, (Struct40164,))
+        ccall(f_c.ptr, Ptr{Cvoid}, (Struct40164,), Struct40164(0, 1, 2))
+        ret
+    end
+
+    @test test_40164() == [Struct40164(0, 1, 2)]
+end
+
 else
 
 @test_broken "cfunction: no support for closures on this platform"
@@ -1443,15 +1463,28 @@ end
              eval(:(f20835(x) = ccall(:fn, Cvoid, (Ptr{typeof(x)},), x))))
 @test_throws(UndefVarError(:Something_not_defined_20835),
              eval(:(f20835(x) = ccall(:fn, Something_not_defined_20835, (Ptr{typeof(x)},), x))))
-
-@noinline f21104at(::Type{T}) where {T} = ccall(:fn, Cvoid, (Some{T},), Some(0))
-@noinline f21104rt(::Type{T}) where {T} = ccall(:fn, Some{T}, ())
-@test code_llvm(devnull, f21104at, (Type{Float64},)) === nothing
-@test code_llvm(devnull, f21104rt, (Type{Float64},)) === nothing
-@test_throws(ErrorException("ccall argument 1 doesn't correspond to a C type"),
-             f21104at(Float64))
-@test_throws(ErrorException("ccall return type doesn't correspond to a C type"),
-             f21104rt(Float64))
+@test isempty(methods(f20835))
+
+@test_throws(ErrorException("ccall method definition: argument 1 type doesn't correspond to a C type"),
+             @eval f21104(::Type{T}) where {T} = ccall(:fn, Cvoid, (Some{T},), Some(0)))
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval f21104(::Type{T}) where {T} = ccall(:fn, Some{T}, ()))
+@test isempty(methods(f21104))
+@test_throws(ErrorException("ccall method definition: argument 1 type doesn't correspond to a C type"),
+             @eval if false; ccall(:fn, Cvoid, (Some.body,), Some(0)); end)
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval if false; ccall(:fn, Some.body, ()); end)
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval if false; ccall(:fn, Tuple, ()); end)
+## TODO: lowering is broken on this (throws "syntax: ssavalue with no def")
+#@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+#             @eval if false; ccall(:fn, Tuple{Val{T}} where T, ()); end)
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval if false; ccall(:fn, Tuple{Val}, ()); end)
+@test_throws(TypeError, @eval if false; ccall(:fn, Some.var, ()); end)
+@test_throws(TypeError, @eval if false; ccall(:fn, Cvoid, (Some.var,), Some(0)); end)
+@test_throws(ErrorException("ccall method definition: Vararg not allowed for argument list"),
+             @eval ccall(+, Int, (Vararg{Int},), 1))
 
 # test for malformed syntax errors
 @test Expr(:error, "more arguments than types for ccall") == Meta.lower(@__MODULE__, :(ccall(:fn, A, (), x)))
@@ -1482,21 +1515,20 @@ end
 
 evalf_callback_19805(ci::callinfos_19805{FUNC_FT}) where {FUNC_FT} = ci.f(0.5)::Float64
 
-evalf_callback_c_19805(ci::callinfos_19805{FUNC_FT}) where {FUNC_FT} = @cfunction(
-    evalf_callback_19805, Float64, (callinfos_19805{FUNC_FT},))
-
-@test_throws(ErrorException("cfunction argument 1 doesn't correspond to a C type"),
-             evalf_callback_c_19805( callinfos_19805(sin) ))
-@test_throws(ErrorException("cfunction argument 2 doesn't correspond to a C type"),
-             @cfunction(+, Int, (Int, Nothing)))
-@test_throws(ErrorException("cfunction: Vararg syntax not allowed for argument list"),
-             @cfunction(+, Int, (Vararg{Int},)))
+@test_throws(ErrorException("cfunction method definition: argument 1 type doesn't correspond to a C type"),
+             @eval evalf_callback_c_19805(ci::callinfos_19805{FUNC_FT}) where {FUNC_FT} =
+                 @cfunction(evalf_callback_19805, Float64, (callinfos_19805{FUNC_FT},)))
+@test isempty(methods(evalf_callback_c_19805))
+@test_throws(ErrorException("cfunction method definition: Vararg not allowed for argument list"),
+             @eval if false; @cfunction(+, Int, (Vararg{Int},)); end)
 @test_throws(ErrorException("could not evaluate cfunction argument type (it might depend on a local variable)"),
              @eval () -> @cfunction(+, Int, (Ref{T}, Ref{T})) where T)
 @test_throws(ErrorException("could not evaluate cfunction return type (it might depend on a local variable)"),
              @eval () -> @cfunction(+, Ref{T}, (Int, Int)) where T)
+@test_throws(ErrorException("cfunction argument 2 doesn't correspond to a C type"),
+             @eval @cfunction(+, Int, (Int, Nothing)))
 @test_throws(ErrorException("cfunction return type Ref{Any} is invalid. Use Any or Ptr{Any} instead."),
-             @cfunction(+, Ref{Any}, (Int, Int)))
+             @eval @cfunction(+, Ref{Any}, (Int, Int)))
 
 # test Ref{abstract_type} calling parameter passes a heap box
 abstract type Abstract22734 end
@@ -1770,3 +1802,47 @@ ccall_with_undefined_lib() = ccall((:time, xx_nOt_DeFiNeD_xx), Cint, (Ptr{Cvoid}
     b16 = transcode(UInt16, b8)
     @test b16 == b
 end
+
+# issue 33413
+@testset "cglobal lowering" begin
+    # crash in cglobal33413_ptrinline[_notype]() specifically requires the library pointer be
+    # retrieved inside the function; using global pointer variable doesn't trigger the crash
+    function cglobal33413_ptrvar()
+        libh = Libdl.dlopen(libccalltest)
+        sym = Libdl.dlsym(libh, :global_var)
+        return cglobal(sym, Cint)
+    end
+    function cglobal33413_ptrvar_notype()
+        libh = Libdl.dlopen(libccalltest)
+        sym = Libdl.dlsym(libh, :global_var)
+        return cglobal(sym)
+    end
+    function cglobal33413_ptrinline()
+        libh = Libdl.dlopen(libccalltest)
+        return cglobal(Libdl.dlsym(libh, :global_var), Cint)
+    end
+    function cglobal33413_ptrinline_notype()
+        libh = Libdl.dlopen(libccalltest)
+        return cglobal(Libdl.dlsym(libh, :global_var))
+    end
+    function cglobal33413_tupleliteral()
+        return cglobal((:global_var, libccalltest), Cint)
+    end
+    function cglobal33413_tupleliteral_notype()
+        return cglobal((:global_var, libccalltest))
+    end
+    function cglobal33413_literal()
+        return cglobal(:sin, Cint)
+    end
+    function cglobal33413_literal_notype()
+        return cglobal(:sin)
+    end
+    @test unsafe_load(cglobal33413_ptrvar()) == 1
+    @test unsafe_load(cglobal33413_ptrinline()) == 1
+    @test unsafe_load(cglobal33413_tupleliteral()) == 1
+    @test unsafe_load(convert(Ptr{Cint}, cglobal33413_ptrvar_notype())) == 1
+    @test unsafe_load(convert(Ptr{Cint}, cglobal33413_ptrinline_notype())) == 1
+    @test unsafe_load(convert(Ptr{Cint}, cglobal33413_tupleliteral_notype())) == 1
+    @test cglobal33413_literal() != C_NULL
+    @test cglobal33413_literal_notype() != C_NULL
+end
diff --git a/test/channels.jl b/test/channels.jl
index c5b1b3f6db9a51..0611b387e6f884 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -333,7 +333,7 @@ end
     # interpreting the calling function.
     @noinline garbage_finalizer(f) = (finalizer(f, "gar" * "bage"); nothing)
     run = Ref(0)
-    garbage_finalizer(x -> nothing) # warmup
+    garbage_finalizer(Returns(nothing)) # warmup
     @test GC.enable(false)
     # test for finalizers trying to yield leading to failed attempts to context switch
     garbage_finalizer((x) -> (run[] += 1; sleep(1)))
@@ -533,7 +533,7 @@ end
 
 # make sure that we don't accidentally create a one-shot timer
 let
-    t = Timer(t->nothing, 10, interval=0.00001)
+    t = Timer(Returns(nothing), 10, interval=0.00001)
     @test ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 1
     close(t)
 end
diff --git a/test/char.jl b/test/char.jl
index abc8db33cb4a7b..279adb628ed178 100644
--- a/test/char.jl
+++ b/test/char.jl
@@ -99,6 +99,7 @@ end
     #getindex(c::Char) = c
     for x in testarrays
         @test getindex(x) == x
+        @test getindex(x, CartesianIndex()) == x
     end
 
     #first(c::Char) = c
@@ -290,6 +291,7 @@ end
 
 @testset "broadcasting of Char" begin
     @test identity.('a') == 'a'
+    @test 'a' .* ['b', 'c'] == ["ab", "ac"]
 end
 
 @testset "code point format of U+ syntax (PR 33291)" begin
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 4a3b4c7ddd028e..21f313fdbbb34b 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -5,6 +5,32 @@ using Random, Sockets
 const STDLIB_DIR = Sys.STDLIB
 const STDLIBS = filter!(x -> isfile(joinpath(STDLIB_DIR, x, "src", "$(x).jl")), readdir(STDLIB_DIR))
 
+const TESTNAMES = [
+        "subarray", "core", "compiler", "worlds", "atomics",
+        "keywordargs", "numbers", "subtype",
+        "char", "strings", "triplequote", "unicode", "intrinsics",
+        "dict", "hashing", "iobuffer", "staged", "offsetarray",
+        "arrayops", "tuple", "reduce", "reducedim", "abstractarray",
+        "intfuncs", "simdloop", "vecelement", "rational",
+        "bitarray", "copy", "math", "fastmath", "functional", "iterators",
+        "operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
+        "sorting", "spawn", "backtrace", "exceptions",
+        "file", "read", "version", "namedtuple",
+        "mpfr", "broadcast", "complex",
+        "floatapprox", "stdlib", "reflection", "regex", "float16",
+        "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
+        "euler", "show", "client",
+        "errorshow", "sets", "goto", "llvmcall", "llvmcall2", "ryu",
+        "some", "meta", "stacktraces", "docs",
+        "misc", "threads", "stress", "binaryplatforms", "atexit",
+        "enums", "cmdlineargs", "int", "interpreter",
+        "checked", "bitset", "floatfuncs", "precompile",
+        "boundscheck", "error", "ambiguous", "cartesian", "osutils",
+        "channels", "iostream", "secretbuffer", "specificity",
+        "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
+        "smallarrayshrink", "opaque_closure", "filesystem", "download"
+]
+
 """
 
 `tests, net_on, exit_on_error, seed = choosetests(choices)` selects a set of tests to be
@@ -32,32 +58,6 @@ in the `choices` argument:
      This option can be used to reproduce failed tests.
 """
 function choosetests(choices = [])
-    testnames = [
-        "subarray", "core", "compiler", "worlds",
-        "keywordargs", "numbers", "subtype",
-        "char", "strings", "triplequote", "unicode", "intrinsics",
-        "dict", "hashing", "iobuffer", "staged", "offsetarray",
-        "arrayops", "tuple", "reduce", "reducedim", "abstractarray",
-        "intfuncs", "simdloop", "vecelement", "rational",
-        "bitarray", "copy", "math", "fastmath", "functional", "iterators",
-        "operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
-        "sorting", "spawn", "backtrace", "exceptions",
-        "file", "read", "version", "namedtuple",
-        "mpfr", "broadcast", "complex",
-        "floatapprox", "stdlib", "reflection", "regex", "float16",
-        "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
-        "euler", "show", "client",
-        "errorshow", "sets", "goto", "llvmcall", "llvmcall2", "ryu",
-        "some", "meta", "stacktraces", "docs",
-        "misc", "threads", "stress", "binaryplatforms", "atexit",
-        "enums", "cmdlineargs", "int", "interpreter",
-        "checked", "bitset", "floatfuncs", "precompile",
-        "boundscheck", "error", "ambiguous", "cartesian", "osutils",
-        "channels", "iostream", "secretbuffer", "specificity",
-        "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
-        "smallarrayshrink", "opaque_closure", "filesystem", "download"
-    ]
-
     tests = []
     skip_tests = []
     exit_on_error = false
@@ -80,7 +80,7 @@ function choosetests(choices = [])
     end
 
     if tests == ["all"] || isempty(tests)
-        tests = testnames
+        tests = TESTNAMES
     end
 
     function filtertests!(tests, name, files=[name])
diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c
index 1c9f7c8e4ad705..78dcc195d59ced 100644
--- a/test/clangsa/MissingRoots.c
+++ b/test/clangsa/MissingRoots.c
@@ -409,14 +409,6 @@ void stack_rooted(jl_value_t *lb JL_MAYBE_UNROOTED, jl_value_t *ub JL_MAYBE_UNRO
     JL_GC_POP();
 }
 
-void JL_NORETURN throw_internal(jl_value_t *e JL_MAYBE_UNROOTED)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    ptls->sig_exception = e;
-    jl_gc_unsafe_enter(ptls);
-    look_at_value(e);
-}
-
 JL_DLLEXPORT jl_value_t *jl_totally_used_function(int i)
 {
     jl_value_t *v = jl_box_int32(i); // expected-note{{Started tracking value here}}
diff --git a/test/client.jl b/test/client.jl
index 497cc54b135342..f917e45fb412da 100644
--- a/test/client.jl
+++ b/test/client.jl
@@ -18,14 +18,22 @@ nested_error_pattern = r"""
 
 @testset "display_error" begin
     # Display of errors which cause more than one entry on the exception stack
-    err_str = try
+    excs = try
         eval(nested_error_expr)
     catch
-        excs = Base.catch_stack()
-        @test typeof.(first.(excs)) == [UndefVarError, DivideError]
-        sprint(Base.display_error, excs)
+        Base.current_exceptions()
     end
-    @test occursin(nested_error_pattern, err_str)
+    @test typeof.(first.(excs)) == [UndefVarError, DivideError]
+    @test occursin(nested_error_pattern, sprint(Base.display_error, excs))
+
+    @test occursin(r"""
+        2-element ExceptionStack:
+        DivideError: integer division error
+        Stacktrace:.*
+
+        caused by: UndefVarError: __not_a_binding__ not defined
+        Stacktrace:.*
+        """s, sprint(show, excs))
 end
 
 @testset "Fallback REPL" begin
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 92d6ac9b1ee22e..fb206acf03477e 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -324,7 +324,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         rm(memfile)
         @test popfirst!(got) == "        0 g(x) = x + 123456"
         @test popfirst!(got) == "        - function f(x)"
-        @test popfirst!(got) == "       80     []"
+        if Sys.WORD_SIZE == 64
+            @test popfirst!(got) == "       48     []"
+        else
+            @test popfirst!(got) == "       32     []"
+        end
         if Sys.WORD_SIZE == 64
             # P64 pools with 64 bit tags
             @test popfirst!(got) == "       16     Base.invokelatest(g, 0)"
@@ -337,7 +341,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
             @test popfirst!(got) == "        8     Base.invokelatest(g, 0)"
             @test popfirst!(got) == "       32     Base.invokelatest(g, x)"
         end
-        @test popfirst!(got) == "       80     []"
+        if Sys.WORD_SIZE == 64
+            @test popfirst!(got) == "       48     []"
+        else
+            @test popfirst!(got) == "       32     []"
+        end
         @test popfirst!(got) == "        - end"
         @test popfirst!(got) == "        - f(1.23)"
         @test isempty(got) || got
@@ -350,6 +358,9 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test readchomp(`$exename -E "Base.JLOptions().opt_level" --optimize`) == "3"
     @test readchomp(`$exename -E "Base.JLOptions().opt_level" -O0`) == "0"
 
+    @test readchomp(`$exename -E "Base.JLOptions().opt_level_min"`) == "0"
+    @test readchomp(`$exename -E "Base.JLOptions().opt_level_min" --min-optlevel=2`) == "2"
+
     # -g
     @test readchomp(`$exename -E "Base.JLOptions().debug_level" -g`) == "2"
     let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g0`)
@@ -385,6 +396,8 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         filter!(a -> !startswith(a, "--check-bounds="), exename_default_checkbounds.exec)
         @test parse(Int, readchomp(`$exename_default_checkbounds -E "Int(Base.JLOptions().check_bounds)"`)) ==
             JL_OPTIONS_CHECK_BOUNDS_DEFAULT
+        @test parse(Int, readchomp(`$exename -E "Int(Base.JLOptions().check_bounds)"
+            --check-bounds=auto`)) == JL_OPTIONS_CHECK_BOUNDS_DEFAULT
         @test parse(Int, readchomp(`$exename -E "Int(Base.JLOptions().check_bounds)"
             --check-bounds=yes`)) == JL_OPTIONS_CHECK_BOUNDS_ON
         @test parse(Int, readchomp(`$exename -E "Int(Base.JLOptions().check_bounds)"
@@ -586,7 +599,7 @@ end
 
 
 # test error handling code paths of running --sysimage
-let exename = Base.julia_cmd()
+let exename = `$(Base.julia_cmd().exec[1]) -t 1`
     sysname = unsafe_string(Base.JLOptions().image_file)
     for nonexist_image in (
             joinpath(@__DIR__, "nonexistent"),
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index 47f419d9937a21..fbba027cd9f91f 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -350,7 +350,7 @@ struct Const{T<:Array}
 end
 
 @eval Base.getindex(A::Const, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1)
-@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =  (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =  (@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
 
 function foo31018!(a, b)
     @aliasscope for i in eachindex(a, b)
@@ -553,3 +553,87 @@ end
     end
     @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f4, Tuple{Bool}, true, false, false))
 end
+
+# issue #32843
+function f32843(vals0, v)
+    (length(vals0) > 1) && (vals = v[1])
+    (length(vals0) == 1 && vals0[1]==1) && (vals = 1:2)
+    vals
+end
+@test_throws UndefVarError f32843([6], Vector[[1]])
+
+# issue #40855, struct constants with union fields
+@enum T40855 X40855
+struct A40855
+    d::Union{Nothing, T40855}
+    b::Union{Nothing, Int}
+end
+g() = string(A40855(X40855, 1))
+@test g() == "$(@__MODULE__).A40855($(@__MODULE__).X40855, 1)"
+
+# issue #40612
+f40612(a, b) = a|b === a|b
+g40612(a, b) = a[]|a[] === b[]|b[]
+@test f40612(true, missing)
+@test !g40612(Union{Bool,Missing}[missing], Union{Bool,Missing}[true])
+@test !g40612(Union{Bool,Missing}[false], Union{Bool,Missing}[true])
+@test g40612(Union{Bool,Missing}[missing], Union{Bool,Missing}[missing])
+@test g40612(Union{Bool,Missing}[true], Union{Bool,Missing}[true])
+@test g40612(Union{Bool,Missing}[false], Union{Bool,Missing}[false])
+
+# issue #41438
+struct A41438{T}
+  x::Ptr{T}
+end
+struct B41438{T}
+  x::T
+end
+f41438(y) = y[].x
+@test A41438.body.layout != C_NULL
+@test B41438.body.layout === C_NULL
+@test f41438(Ref{A41438}(A41438(C_NULL))) === C_NULL
+@test f41438(Ref{B41438}(B41438(C_NULL))) === C_NULL
+
+const S41438 = Pair{Any, Ptr{T}} where T
+g41438() = Array{S41438,1}(undef,1)[1].first
+get_llvm(g41438, ()); # cause allocation of layout
+@test S41438.body.layout != C_NULL
+@test !Base.datatype_pointerfree(S41438.body)
+@test S41438{Int}.layout != C_NULL
+@test !Base.datatype_pointerfree(S41438{Int})
+
+# issue #41157
+f41157(a, b) = a[1] = b[1]
+@test_throws BoundsError f41157(Tuple{Int}[], Tuple{Union{}}[])
+
+# issue #41096
+struct Modulate41096{M<:Union{Function, Val{true}, Val{false}}, id}
+    modulate::M
+    Modulate41096(id::Symbol, modulate::Function) = new{typeof(modulate), id}(modulate)
+    Modulate41096(id::Symbol, modulate::Bool=true) = new{Val{modulate}, id}(modulate|>Val)
+end
+@inline ismodulatable41096(modulate::Modulate41096) = ismodulatable41096(typeof(modulate))
+@inline ismodulatable41096(::Type{<:Modulate41096{Val{B}}}) where B = B
+@inline ismodulatable41096(::Type{<:Modulate41096{<:Function}}) = true
+
+mutable struct Term41096{I, M<:Modulate41096}
+    modulate::M
+    Term41096{I}(modulate::Modulate41096) where I = new{I, typeof(modulate)}(modulate)
+end
+@inline ismodulatable41096(term::Term41096) = ismodulatable41096(typeof(term))
+@inline ismodulatable41096(::Type{<:Term41096{I, M} where I}) where M = ismodulatable41096(M)
+
+function newexpand41096(gen, name::Symbol)
+    flag = ismodulatable41096(getfield(gen, name))
+    if flag
+        return true
+    else
+        return false
+    end
+end
+
+t41096 = Term41096{:t}(Modulate41096(:t, false))
+μ41096 = Term41096{:μ}(Modulate41096(:μ, false))
+U41096 = Term41096{:U}(Modulate41096(:U, false))
+
+@test !newexpand41096((t=t41096, μ=μ41096, U=U41096), :U)
diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl
index bce782d73df877..5d97f4f6542b74 100644
--- a/test/compiler/contextual.jl
+++ b/test/compiler/contextual.jl
@@ -116,7 +116,7 @@ f() = 2
 @test overdub(Ctx(), gcd, 10, 20) === gcd(10, 20)
 
 # Test that pure propagates for Cassette
-Base.@pure isbitstype(T) = T.isbitstype
+Base.@pure isbitstype(T) = Base.isbitstype(T)
 f31012(T) = Val(isbitstype(T))
 @test @inferred(overdub(Ctx(), f31012, Int64)) == Val(true)
 
@@ -135,3 +135,82 @@ let method = which(func2, ())
 end
 func3() = func2()
 @test_throws UndefVarError func3()
+
+
+
+## overlay method tables
+
+module OverlayModule
+
+using Base.Experimental: @MethodTable, @overlay
+
+@MethodTable(mt)
+
+@overlay mt function sin(x::Float64)
+    1
+end
+
+# short function def
+@overlay mt cos(x::Float64) = 2
+
+# parametric function def
+@overlay mt tan(x::T) where {T} = 3
+
+end
+
+methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, typemax(UInt))
+@test only(methods).method.module === Base.Math
+
+methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, OverlayModule.mt, 1, typemax(UInt))
+@test only(methods).method.module === OverlayModule
+
+methods = Base._methods_by_ftype(Tuple{typeof(sin), Int}, OverlayModule.mt, 1, typemax(UInt))
+@test isempty(methods)
+
+# precompilation
+
+load_path = mktempdir()
+depot_path = mktempdir()
+try
+    pushfirst!(LOAD_PATH, load_path)
+    pushfirst!(DEPOT_PATH, depot_path)
+
+    write(joinpath(load_path, "Foo.jl"),
+          """
+          module Foo
+          Base.Experimental.@MethodTable(mt)
+          Base.Experimental.@overlay mt sin(x::Int) = 1
+          end
+          """)
+
+     # precompiling Foo serializes the overlay method through the `mt` binding in the module
+     Foo = Base.require(Main, :Foo)
+     @test length(Foo.mt) == 1
+
+    write(joinpath(load_path, "Bar.jl"),
+          """
+          module Bar
+          Base.Experimental.@MethodTable(mt)
+          end
+          """)
+
+    write(joinpath(load_path, "Baz.jl"),
+          """
+          module Baz
+          using Bar
+          Base.Experimental.@overlay Bar.mt sin(x::Int) = 1
+          end
+          """)
+
+     # when referring an method table in another module,
+     # the overlay method needs to be discovered explicitly
+     Bar = Base.require(Main, :Bar)
+     @test length(Bar.mt) == 0
+     Baz = Base.require(Main, :Baz)
+     @test length(Bar.mt) == 1
+finally
+    rm(load_path, recursive=true, force=true)
+    rm(depot_path, recursive=true, force=true)
+    filter!((≠)(load_path), LOAD_PATH)
+    filter!((≠)(depot_path), DEPOT_PATH)
+end
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 754fd7a3f5ce8c..9ad091f08e5820 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -44,6 +44,18 @@ let t = Tuple{Ref{T},T,T} where T, c = Tuple{Ref, T, T} where T # #36407
     @test t <: Core.Compiler.limit_type_size(t, c, Union{}, 1, 100)
 end
 
+# obtain Vararg with 2 undefined fields
+let va = ccall(:jl_type_intersection_with_env, Any, (Any, Any), Tuple{Tuple}, Tuple{Tuple{Vararg{Any, N}}} where N)[2][1]
+    @test Core.Compiler.limit_type_size(Tuple, va, Union{}, 2, 2) === Any
+end
+
+let # 40336
+    t = Type{Type{Int}}
+    c = Type{Int}
+    r = Core.Compiler.limit_type_size(t, c, c, 100, 100)
+    @test t !== r && t <: r
+end
+
 @test Core.Compiler.unionlen(Union{}) == 1
 @test Core.Compiler.unionlen(Int8) == 1
 @test Core.Compiler.unionlen(Union{Int8, Int16}) == 2
@@ -670,8 +682,8 @@ let fieldtype_tfunc = Core.Compiler.fieldtype_tfunc,
     @test fieldtype_tfunc(Union{Type{Base.RefValue{<:Real}}, Type{Int32}}, Const(:x)) == Const(Real)
     @test fieldtype_tfunc(Const(Union{Base.RefValue{<:Real}, Type{Int32}}), Const(:x)) == Const(Real)
     @test fieldtype_tfunc(Type{Union{Base.RefValue{T}, Type{Int32}}} where {T<:Real}, Const(:x)) == Type{<:Real}
-    @test fieldtype_tfunc(Type{<:Tuple}, Const(1)) == Type
-    @test fieldtype_tfunc(Type{<:Tuple}, Any) == Type
+    @test fieldtype_tfunc(Type{<:Tuple}, Const(1)) == Any
+    @test fieldtype_tfunc(Type{<:Tuple}, Any) == Any
     @test fieldtype_nothrow(Type{Base.RefValue{<:Real}}, Const(:x))
     @test !fieldtype_nothrow(Type{Union{}}, Const(:x))
     @test !fieldtype_nothrow(Union{Type{Base.RefValue{T}}, Int32} where {T<:Real}, Const(:x))
@@ -1517,7 +1529,6 @@ let linfo = get_linfo(Base.convert, Tuple{Type{Int64}, Int32}),
     @test opt.src.ssavaluetypes isa Vector{Any}
     @test !opt.src.inferred
     @test opt.mod === Base
-    @test opt.nargs == 3
 end
 
 # approximate static parameters due to unions
@@ -1544,9 +1555,9 @@ f_pure_add() = (1 + 1 == 2) ? true : "FAIL"
 @test @inferred f_pure_add()
 
 # inference of `T.mutable`
-@test Core.Compiler.getfield_tfunc(Const(Int), Const(:mutable)) == Const(false)
-@test Core.Compiler.getfield_tfunc(Const(Vector{Int}), Const(:mutable)) == Const(true)
-@test Core.Compiler.getfield_tfunc(DataType, Const(:mutable)) == Bool
+@test Core.Compiler.getfield_tfunc(Const(Int.name), Const(:flags)) == Const(0x4)
+@test Core.Compiler.getfield_tfunc(Const(Vector{Int}.name), Const(:flags)) == Const(0x2)
+@test Core.Compiler.getfield_tfunc(Core.TypeName, Const(:flags)) == UInt8
 
 # getfield on abstract named tuples. issue #32698
 import Core.Compiler.getfield_tfunc
@@ -1816,16 +1827,57 @@ end
         return c, d # ::Tuple{Int,Int}
     end == Any[Tuple{Int,Int}]
 
-    # shouldn't use the old constraint when the subject of condition has changed
+    # should invalidate old constraint when the subject of condition has changed
     @test Base.return_types((Union{Nothing,Int},)) do a
-        b = a === nothing
-        c = b ? 0 : a # c::Int
+        cond = a === nothing
+        r1 = cond ? 0 : a # r1::Int
         a = 0
-        d = b ? a : 1 # d::Int, not d::Union{Nothing,Int}
-        return c, d # ::Tuple{Int,Int}
+        r2 = cond ? a : 1 # r2::Int, not r2::Union{Nothing,Int}
+        return r1, r2 # ::Tuple{Int,Int}
     end == Any[Tuple{Int,Int}]
 end
 
+# https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
+# `PartialStruct` shoudln't wrap `Conditional`
+let M = Module()
+    @eval M begin
+        struct BePartialStruct
+            val::Int
+            cond
+        end
+    end
+
+    rt = @eval M begin
+        Base.return_types((Union{Nothing,Int},)) do a
+            cond = a === nothing
+            obj = $(Expr(:new, M.BePartialStruct, 42, :cond))
+            r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
+            a = $(gensym(:anyvar))::Any
+            r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constrait invalidation here)
+            return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
+        end |> only
+    end
+    @test rt == Tuple{Union{Nothing,Int},Any}
+end
+
+@testset "conditional constraint propagation from non-`Conditional` object" begin
+    @test Base.return_types((Bool,)) do b
+        if b
+            return !b ? nothing : 1 # ::Int
+        else
+            return 0
+        end
+    end == Any[Int]
+
+    @test Base.return_types((Any,)) do b
+        if b
+            return b # ::Bool
+        else
+            return nothing
+        end
+    end == Any[Union{Bool,Nothing}]
+end
+
 function f25579(g)
     h = g[]
     t = (h === nothing)
@@ -2185,12 +2237,10 @@ code28279 = code_lowered(f28279, (Bool,))[1].code
 oldcode28279 = deepcopy(code28279)
 ssachangemap = fill(0, length(code28279))
 labelchangemap = fill(0, length(code28279))
-worklist = Int[]
 let i
     for i in 1:length(code28279)
         stmt = code28279[i]
         if isa(stmt, GotoIfNot)
-            push!(worklist, i)
             ssachangemap[i] = 1
             if i < length(code28279)
                 labelchangemap[i + 1] = 1
@@ -2647,7 +2697,7 @@ const DenseIdx = Union{IntRange,Integer}
     foo_26724((result..., length(r)), I...)
 @test @inferred(foo_26724((), 1:4, 1:5, 1:6)) === (4, 5, 6)
 
-# Non uniformity in expresions with PartialTypeVar
+# Non uniformity in expressions with PartialTypeVar
 @test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
 let N = TypeVar(:N)
     @test Core.Compiler.apply_type_nothrow([Core.Compiler.Const(NTuple),
@@ -2850,9 +2900,24 @@ partial_return_2(x) = Val{partial_return_1(x)[2]}
 
 @test Base.return_types(partial_return_2, (Int,)) == Any[Type{Val{1}}]
 
-# Precision of abstract_iteration
+# Soundness and precision of abstract_iteration
+f41839() = (1:100...,)
+@test NTuple{100,Int} <: only(Base.return_types(f41839, ())) <: Tuple{Vararg{Int}}
 f_splat(x) = (x...,)
 @test Base.return_types(f_splat, (Pair{Int,Int},)) == Any[Tuple{Int, Int}]
+@test Base.return_types(f_splat, (UnitRange{Int},)) == Any[Tuple{Vararg{Int}}]
+struct Itr41839_1 end # empty or infinite
+Base.iterate(::Itr41839_1) = rand(Bool) ? (nothing, nothing) : nothing
+Base.iterate(::Itr41839_1, ::Nothing) = (nothing, nothing)
+@test Base.return_types(f_splat, (Itr41839_1,)) == Any[Tuple{}]
+struct Itr41839_2 end # empty or failing
+Base.iterate(::Itr41839_2) = rand(Bool) ? (nothing, nothing) : nothing
+Base.iterate(::Itr41839_2, ::Nothing) = error()
+@test Base.return_types(f_splat, (Itr41839_2,)) == Any[Tuple{}]
+struct Itr41839_3 end
+Base.iterate(::Itr41839_3 ) = rand(Bool) ? nothing : (nothing, 1)
+Base.iterate(::Itr41839_3 , i) = i < 16 ? (i, i + 1) : nothing
+@test only(Base.return_types(f_splat, (Itr41839_3,))) <: Tuple{Vararg{Union{Nothing, Int}}}
 
 # issue #32699
 f32699(a) = (id = a[1],).id
@@ -2928,7 +2993,8 @@ end
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Vararg{Symbol}}) == Symbol
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Vararg{Integer}}) == Integer
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Vararg}) == Integer
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Vararg}) == Union{}
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Vararg}) == Integer
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Any, Vararg}) == Union{}
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Vararg}) == Expr
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Any, Vararg}) == Expr
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Any, Any, Vararg}) == Expr
@@ -2939,11 +3005,12 @@ end
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Vararg}) == Int
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Vararg}) == Int
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Any, Vararg}) == Int
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Any, Any, Any, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Vararg}) == Union{Type, TypeVar}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Vararg}) == Union{Type, TypeVar}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Vararg}) == Union{Type, TypeVar}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Any, Vararg}) == Union{Type, TypeVar}
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Any, Any, Vararg}) == Int
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Any, Any, Any, Any, Any, Vararg}) == Union{}
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Vararg}) == Any
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Vararg}) == Any
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Vararg}) == Any
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Any, Vararg}) == Any
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Any, Any, Vararg}) == Union{}
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Vararg}) == Any
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Any, Vararg}) == Any
@@ -2981,14 +3048,14 @@ end
 # Some very limited testing of timing the type inference (#37749).
 @testset "Core.Compiler.Timings" begin
     # Functions that call each other
-    @eval module M
+    @eval module M1
         i(x) = x+5
         i2(x) = x+2
         h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
         g(y::Integer, x) = h(Any[y]) + Int(x)
     end
     timing1 = time_inference() do
-        @eval M.g(2, 3.0)
+        @eval M1.g(2, 3.0)
     end
     @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1))
     # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
@@ -3000,11 +3067,11 @@ end
     @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame})
     # Test that inference has cached some of the Method Instances
     timing2 = time_inference() do
-        @eval M.g(2, 3.0)
+        @eval M1.g(2, 3.0)
     end
     @test length(flatten_times(timing2)) < length(flatten_times(timing1))
     # Printing of InferenceFrameInfo for mi.def isa Module
-    @eval module M
+    @eval module M2
         i(x) = x+5
         i2(x) = x+2
         h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
@@ -3014,7 +3081,7 @@ end
     timingmod = time_inference() do
         @eval @testset "Outer" begin
             @testset "Inner" begin
-                for i = 1:2 M.g(2, 3.0) end
+                for i = 1:2 M2.g(2, 3.0) end
             end
         end
     end
@@ -3171,8 +3238,6 @@ end
 end
 
 @testset "constant prop' for union split signature" begin
-    anonymous_module() = Core.eval(@__MODULE__, :(module $(gensym()) end))::Module
-
     # indexing into tuples really relies on constant prop', and we will get looser result
     # (`Union{Int,String,Char}`) if constant prop' doesn't happen for splitunion signatures
     tt = (Union{Tuple{Int,String},Tuple{Int,Char}},)
@@ -3191,7 +3256,7 @@ end
         b
     end == Any[Union{String,Char}]
 
-    @test (@eval anonymous_module() begin
+    @test (@eval Module() begin
         struct F32
             val::Float32
             _v::Int
@@ -3205,7 +3270,7 @@ end
         end
     end) == Any[Union{Float32,Float64}]
 
-    @test (@eval anonymous_module() begin
+    @test (@eval Module() begin
         struct F32
             val::Float32
             _v
@@ -3243,3 +3308,198 @@ end
         Some(0x2)
     end
 end == [Union{Some{Float64}, Some{Int}, Some{UInt8}}]
+
+# https://github.com/JuliaLang/julia/issues/40336
+@testset "make sure a call with signatures with recursively nested Types terminates" begin
+    @test @eval Module() begin
+        f(@nospecialize(t)) = f(Type{t})
+
+        code_typed() do
+            f(Int)
+        end
+        true
+    end
+
+    @test @eval Module() begin
+        f(@nospecialize(t)) = tdepth(t) == 10 ? t : f(Type{t})
+        tdepth(@nospecialize(t)) = isempty(t.parameters) ? 1 : 1+tdepth(t.parameters[1])
+
+        code_typed() do
+            f(Int)
+        end
+        true
+    end
+end
+
+# Make sure that const prop doesn't fall into cycles that aren't problematic
+# in the type domain
+f_recurse(x) = x > 1000000 ? x : f_recurse(x+1)
+@test Base.return_types() do
+    f_recurse(1)
+end |> first === Int
+
+# issue #39915
+function f33915(a_tuple, which_ones)
+    rest = f33915(Base.tail(a_tuple), Base.tail(which_ones))
+    if first(which_ones)
+        (first(a_tuple), rest...)
+    else
+        rest
+    end
+end
+f33915(a_tuple::Tuple{}, which_ones::Tuple{}) = ()
+g39915(a_tuple) = f33915(a_tuple, (true, false, true, false))
+@test Base.return_types() do
+    g39915((1, 1.0, "a", :a))
+end |> first === Tuple{Int, String}
+
+# issue #40742
+@test Base.return_types(string, (Vector{Tuple{:x}},)) == Any[String]
+
+# issue #40804
+@test Base.return_types(()) do; ===(); end == Any[Union{}]
+@test Base.return_types(()) do; typeassert(); end == Any[Union{}]
+
+primitive type UInt24ish 24 end
+f34288(x) = Core.Intrinsics.checked_sdiv_int(x, Core.Intrinsics.trunc_int(UInt24ish, 0))
+@test Base.return_types(f34288, (UInt24ish,)) == Any[UInt24ish]
+
+# Inference of PhiNode showing up in lowered AST
+function f_convert_me_to_ir(b, x)
+    a = b ? sin(x) : cos(x)
+    return a
+end
+
+let
+    # Test the presence of PhiNodes in lowered IR by taking the above function,
+    # running it through SSA conversion and then putting it into an opaque
+    # closure.
+    mi = Core.Compiler.specialize_method(first(methods(f_convert_me_to_ir)),
+        Tuple{Bool, Float64}, Core.svec())
+    ci = Base.uncompressed_ast(mi.def)
+    ci.ssavaluetypes = Any[Any for i = 1:ci.ssavaluetypes]
+    sv = Core.Compiler.OptimizationState(mi, Core.Compiler.OptimizationParams(),
+        Core.Compiler.NativeInterpreter())
+    ir = Core.Compiler.convert_to_ircode(ci, Core.Compiler.copy_exprargs(ci.code),
+        false, sv)
+    ir = Core.Compiler.slot2reg(ir, ci, sv)
+    ir = Core.Compiler.compact!(ir)
+    Core.Compiler.replace_code_newstyle!(ci, ir, 4)
+    ci.ssavaluetypes = length(ci.code)
+    @test any(x->isa(x, Core.PhiNode), ci.code)
+    oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, false, Any, Any,
+        Expr(:opaque_closure_method, nothing, 2, LineNumberNode(0, nothing), ci)))(b, 1.0)
+    @test Base.return_types(oc, Tuple{Bool}) == Any[Float64]
+
+    oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, false, Any, Any,
+        Expr(:opaque_closure_method, nothing, 2, LineNumberNode(0, nothing), ci)))(true, 1.0)
+    @test Base.return_types(oc, Tuple{}) == Any[Float64]
+end
+
+@testset "constant prop' on `invoke` calls" begin
+    m = Module()
+
+    # simple cases
+    @eval m begin
+        f(a::Any,    sym::Bool) = sym ? Any : :any
+        f(a::Number, sym::Bool) = sym ? Number : :number
+    end
+    @test (@eval m Base.return_types((Any,)) do a
+        Base.@invoke f(a::Any, true::Bool)
+    end) == Any[Type{Any}]
+    @test (@eval m Base.return_types((Any,)) do a
+        Base.@invoke f(a::Number, true::Bool)
+    end) == Any[Type{Number}]
+    @test (@eval m Base.return_types((Any,)) do a
+        Base.@invoke f(a::Any, false::Bool)
+    end) == Any[Symbol]
+    @test (@eval m Base.return_types((Any,)) do a
+        Base.@invoke f(a::Number, false::Bool)
+    end) == Any[Symbol]
+
+    # https://github.com/JuliaLang/julia/issues/41024
+    @eval m begin
+        # mixin, which expects common field `x::Int`
+        abstract type AbstractInterface end
+        Base.getproperty(x::AbstractInterface, sym::Symbol) =
+            sym === :x ? getfield(x, sym)::Int :
+            return getfield(x, sym) # fallback
+
+        # extended mixin, which expects additional field `y::Rational{Int}`
+        abstract type AbstractInterfaceExtended <: AbstractInterface end
+        Base.getproperty(x::AbstractInterfaceExtended, sym::Symbol) =
+            sym === :y ? getfield(x, sym)::Rational{Int} :
+            return Base.@invoke getproperty(x::AbstractInterface, sym::Symbol)
+    end
+    @test (@eval m Base.return_types((AbstractInterfaceExtended,)) do x
+        x.x
+    end) == Any[Int]
+end
+
+@testset "fieldtype for unions" begin # e.g. issue #40177
+    f40177(::Type{T}) where {T} = fieldtype(T, 1)
+    for T in [
+        Union{Tuple{Val}, Tuple{Tuple}},
+        Union{Base.RefValue{T}, Type{Int32}} where T<:Real,
+        Union{Tuple{Vararg{Symbol}}, Tuple{Float64, Vararg{Float32}}},
+    ]
+        @test @inferred(f40177(T)) == fieldtype(T, 1)
+    end
+end
+
+# issue #41908
+f41908(x::Complex{T}) where {String<:T<:String} = 1
+g41908() = f41908(Any[1][1])
+@test only(Base.return_types(g41908, ())) <: Int
+
+# issue #42022
+let x = Tuple{Int,Any}[
+        #= 1=# (0, Expr(:(=), Core.SlotNumber(3), 1))
+        #= 2=# (0, Expr(:enter, 18))
+        #= 3=# (2, Expr(:(=), Core.SlotNumber(3), 2.0))
+        #= 4=# (2, Expr(:enter, 12))
+        #= 5=# (4, Expr(:(=), Core.SlotNumber(3), '3'))
+        #= 6=# (4, Core.GotoIfNot(Core.SlotNumber(2), 9))
+        #= 7=# (4, Expr(:leave, 2))
+        #= 8=# (0, Core.ReturnNode(1))
+        #= 9=# (4, Expr(:call, GlobalRef(Main, :throw)))
+        #=10=# (4, Expr(:leave, 1))
+        #=11=# (2, Core.GotoNode(16))
+        #=12=# (4, Expr(:leave, 1))
+        #=13=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
+        #=14=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
+        #=15=# (2, Expr(:pop_exception, Core.SSAValue(4)))
+        #=16=# (2, Expr(:leave, 1))
+        #=17=# (0, Core.GotoNode(22))
+        #=18=# (2, Expr(:leave, 1))
+        #=19=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
+        #=20=# (0, nothing)
+        #=21=# (0, Expr(:pop_exception, Core.SSAValue(2)))
+        #=22=# (0, Core.ReturnNode(Core.SlotNumber(3)))
+    ]
+    handler_at = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
+    @test handler_at == first.(x)
+end
+
+@test only(Base.return_types((Bool,)) do y
+        x = 1
+        try
+            x = 2.0
+            try
+                x = '3'
+                y ? (return 1) : throw()
+            catch ex1
+                rethrow()
+            end
+        catch ex2
+            nothing
+        end
+        return x
+    end) === Union{Int, Float64, Char}
+
+# issue #42097
+struct Foo42097{F} end
+Foo42097(f::F, args) where {F} = Foo42097{F}()
+Foo42097(A) = Foo42097(Base.inferencebarrier(+), Base.inferencebarrier(1)...)
+foo42097() = Foo42097([1]...)
+@test foo42097() isa Foo42097{typeof(+)}
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index 42839d4d954e6a..efe34eb5b35d9c 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -162,8 +162,8 @@ function fully_eliminated(f, args, retval)
     end
 end
 
-# check that type.mutable can be fully eliminated
-f_mutable_nothrow(s::String) = Val{typeof(s).mutable}
+# check that ismutabletype(type) can be fully eliminated
+f_mutable_nothrow(s::String) = Val{typeof(s).name.flags}
 @test fully_eliminated(f_mutable_nothrow, (String,))
 
 # check that ifelse can be fully eliminated
@@ -172,8 +172,7 @@ function f_ifelse(x)
     b = ifelse(a, true, false)
     return b ? x + 1 : x
 end
-# 2 for now because the compiler leaves a GotoNode around
-@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
+@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
 
 # Test that inlining of _apply_iterate properly hits the inference cache
 @noinline cprop_inline_foo1() = (1, 1)
@@ -380,3 +379,274 @@ end
 using Base.Experimental: @opaque
 f_oc_getfield(x) = (@opaque ()->x)()
 @test fully_eliminated(f_oc_getfield, Tuple{Int})
+
+# check if `x` is a statically-resolved call of a function whose name is `sym`
+isinvoke(@nospecialize(x), sym::Symbol) = isinvoke(x, mi->mi.def.name===sym)
+function isinvoke(@nospecialize(x), pred)
+    if Meta.isexpr(x, :invoke)
+        return pred(x.args[1]::Core.MethodInstance)
+    end
+    return false
+end
+code_typed1(args...; kwargs...) = (first(only(code_typed(args...; kwargs...)))::Core.CodeInfo).code
+
+@testset "@inline/@noinline annotation before definition" begin
+    M = Module()
+    @eval M begin
+        @inline function _def_inline(x)
+            # this call won't be resolved and thus will prevent inlining to happen if we don't
+            # annotate `@inline` at the top of this function body
+            return unresolved_call(x)
+        end
+        def_inline(x) = _def_inline(x)
+        @noinline _def_noinline(x) = x # obviously will be inlined otherwise
+        def_noinline(x) = _def_noinline(x)
+
+        # test that they don't conflict with other "before-definition" macros
+        @inline Base.@aggressive_constprop function _def_inline_noconflict(x)
+            # this call won't be resolved and thus will prevent inlining to happen if we don't
+            # annotate `@inline` at the top of this function body
+            return unresolved_call(x)
+        end
+        def_inline_noconflict(x) = _def_inline_noconflict(x)
+        @noinline Base.@aggressive_constprop _def_noinline_noconflict(x) = x # obviously will be inlined otherwise
+        def_noinline_noconflict(x) = _def_noinline_noconflict(x)
+    end
+
+    let code = code_typed1(M.def_inline, (Int,))
+        @test all(code) do x
+            !isinvoke(x, :_def_inline)
+        end
+    end
+    let code = code_typed1(M.def_noinline, (Int,))
+        @test any(code) do x
+            isinvoke(x, :_def_noinline)
+        end
+    end
+    # test that they don't conflict with other "before-definition" macros
+    let code = code_typed1(M.def_inline_noconflict, (Int,))
+        @test all(code) do x
+            !isinvoke(x, :_def_inline_noconflict)
+        end
+    end
+    let code = code_typed1(M.def_noinline_noconflict, (Int,))
+        @test any(code) do x
+            isinvoke(x, :_def_noinline_noconflict)
+        end
+    end
+end
+
+@testset "@inline/@noinline annotation within a function body" begin
+    M = Module()
+    @eval M begin
+        function _body_inline(x)
+            @inline
+            # this call won't be resolved and thus will prevent inlining to happen if we don't
+            # annotate `@inline` at the top of this function body
+            return unresolved_call(x)
+        end
+        body_inline(x) = _body_inline(x)
+        function _body_noinline(x)
+            @noinline
+            return x # obviously will be inlined otherwise
+        end
+        body_noinline(x) = _body_noinline(x)
+
+        # test annotations for `do` blocks
+        @inline simple_caller(a) = a()
+        function do_inline(x)
+            simple_caller() do
+                @inline
+                # this call won't be resolved and thus will prevent inlining to happen if we don't
+                # annotate `@inline` at the top of this anonymous function body
+                return unresolved_call(x)
+            end
+        end
+        function do_noinline(x)
+            simple_caller() do
+                @noinline
+                return x # obviously will be inlined otherwise
+            end
+        end
+    end
+
+    let code = code_typed1(M.body_inline, (Int,))
+        @test all(code) do x
+            !isinvoke(x, :_body_inline)
+        end
+    end
+    let code = code_typed1(M.body_noinline, (Int,))
+        @test any(code) do x
+            isinvoke(x, :_body_noinline)
+        end
+    end
+    # test annotations for `do` blocks
+    let code = code_typed1(M.do_inline, (Int,))
+        # what we test here is that both `simple_caller` and the anonymous function that the
+        # `do` block creates should inlined away, and as a result there is only the unresolved call
+        @test all(code) do x
+            !isinvoke(x, :simple_caller) &&
+            !isinvoke(x, mi->startswith(string(mi.def.name), '#'))
+        end
+    end
+    let code = code_typed1(M.do_noinline, (Int,))
+        # the anonymous function that the `do` block created shouldn't be inlined here
+        @test any(code) do x
+            isinvoke(x, mi->startswith(string(mi.def.name), '#'))
+        end
+    end
+end
+
+@testset "callsite @inline/@noinline annotations" begin
+    M = Module()
+    @eval M begin
+        # this global variable prevents inference to fold everything as constant, and/or the optimizer to inline the call accessing to this
+        g = 0
+
+        @noinline noinlined_explicit(x) = x
+        force_inline_explicit(x)        = @inline noinlined_explicit(x)
+        force_inline_block_explicit(x)  = @inline noinlined_explicit(x) + noinlined_explicit(x)
+        noinlined_implicit(x)          = g
+        force_inline_implicit(x)       = @inline noinlined_implicit(x)
+        force_inline_block_implicit(x) = @inline noinlined_implicit(x) + noinlined_implicit(x)
+
+        @inline inlined_explicit(x)      = x
+        force_noinline_explicit(x)       = @noinline inlined_explicit(x)
+        force_noinline_block_explicit(x) = @noinline inlined_explicit(x) + inlined_explicit(x)
+        inlined_implicit(x)              = x
+        force_noinline_implicit(x)       = @noinline inlined_implicit(x)
+        force_noinline_block_implicit(x) = @noinline inlined_implicit(x) + inlined_implicit(x)
+
+        # test callsite annotations for constant-prop'ed calls
+
+        @noinline Base.@aggressive_constprop noinlined_constprop_explicit(a) = a+g
+        force_inline_constprop_explicit()                                    = @inline noinlined_constprop_explicit(0)
+        Base.@aggressive_constprop noinlined_constprop_implicit(a) = a+g
+        force_inline_constprop_implicit()                          = @inline noinlined_constprop_implicit(0)
+
+        @inline Base.@aggressive_constprop inlined_constprop_explicit(a) = a+g
+        force_noinline_constprop_explicit()                              = @noinline inlined_constprop_explicit(0)
+        @inline Base.@aggressive_constprop inlined_constprop_implicit(a) = a+g
+        force_noinline_constprop_implicit()                              = @noinline inlined_constprop_implicit(0)
+
+        @noinline notinlined(a) = a
+        function nested(a0, b0)
+            @noinline begin
+                a = @inline notinlined(a0) # this call should be inlined
+                b = notinlined(b0) # this call should NOT be inlined
+                return a, b
+            end
+        end
+    end
+
+    let code = code_typed1(M.force_inline_explicit, (Int,))
+        @test all(x->!isinvoke(x, :noinlined_explicit), code)
+    end
+    let code = code_typed1(M.force_inline_block_explicit, (Int,))
+        @test all(code) do x
+            !isinvoke(x, :noinlined_explicit) &&
+            !isinvoke(x, :(+))
+        end
+    end
+    let code = code_typed1(M.force_inline_implicit, (Int,))
+        @test all(x->!isinvoke(x, :noinlined_implicit), code)
+    end
+    let code = code_typed1(M.force_inline_block_implicit, (Int,))
+        @test all(x->!isinvoke(x, :noinlined_explicit), code)
+    end
+
+    let code = code_typed1(M.force_noinline_explicit, (Int,))
+        @test any(x->isinvoke(x, :inlined_explicit), code)
+    end
+    let code = code_typed1(M.force_noinline_block_explicit, (Int,))
+        @test count(x->isinvoke(x, :inlined_explicit), code) == 2
+    end
+    let code = code_typed1(M.force_noinline_implicit, (Int,))
+        @test any(x->isinvoke(x, :inlined_implicit), code)
+    end
+    let code = code_typed1(M.force_noinline_block_implicit, (Int,))
+        @test count(x->isinvoke(x, :inlined_implicit), code) == 2
+    end
+
+    let code = code_typed1(M.force_inline_constprop_explicit)
+        @test all(x->!isinvoke(x, :noinlined_constprop_explicit), code)
+    end
+    let code = code_typed1(M.force_inline_constprop_implicit)
+        @test all(x->!isinvoke(x, :noinlined_constprop_implicit), code)
+    end
+
+    let code = code_typed1(M.force_noinline_constprop_explicit)
+        @test any(x->isinvoke(x, :inlined_constprop_explicit), code)
+    end
+    let code = code_typed1(M.force_noinline_constprop_implicit)
+        @test any(x->isinvoke(x, :inlined_constprop_implicit), code)
+    end
+
+    let code = code_typed1(M.nested, (Int,Int))
+        @test count(x->isinvoke(x, :notinlined), code) == 1
+    end
+end
+
+# force constant-prop' for `setproperty!`
+# https://github.com/JuliaLang/julia/pull/41882
+let code = @eval Module() begin
+        # if we don't force constant-prop', `T = fieldtype(Foo, ::Symbol)` will be union-split to
+        # `Union{Type{Any},Type{Int}` and it will make `convert(T, nothing)` too costly
+        # and it leads to inlining failure
+        mutable struct Foo
+            val
+            _::Int
+        end
+
+        function setter(xs)
+            for x in xs
+                x.val = nothing
+            end
+        end
+
+        $code_typed1(setter, (Vector{Foo},))
+    end
+
+    @test !any(x->isinvoke(x, :setproperty!), code)
+end
+
+# Issue #41299 - inlining deletes error check in :>
+g41299(f::Tf, args::Vararg{Any,N}) where {Tf,N} = f(args...)
+@test_throws TypeError g41299(>:, 1, 2)
+
+# https://github.com/JuliaLang/julia/issues/42078
+# idempotency of callsite inling
+function getcache(mi::Core.MethodInstance)
+    cache = Core.Compiler.code_cache(Core.Compiler.NativeInterpreter())
+    codeinf = Core.Compiler.get(cache, mi, nothing)
+    return isnothing(codeinf) ? nothing : codeinf
+end
+@noinline f42078(a) = sum(sincos(a))
+let
+    ninlined = let
+        code = code_typed1((Int,)) do a
+            @inline f42078(a)
+        end
+        @test all(x->!isinvoke(x, :f42078), code)
+        length(code)
+    end
+
+    let # codegen will discard the source because it's not supposed to be inlined in general context
+        a = 42
+        f42078(a)
+    end
+    let # make sure to discard the inferred source
+        specs = collect(only(methods(f42078)).specializations)
+        mi = specs[findfirst(!isnothing, specs)]::Core.MethodInstance
+        codeinf = getcache(mi)::Core.CodeInstance
+        codeinf.inferred = nothing
+    end
+
+    let # inference should re-infer `f42078(::Int)` and we should get the same code
+        code = code_typed1((Int,)) do a
+            @inline f42078(a)
+        end
+        @test all(x->!isinvoke(x, :f42078), code)
+        @test ninlined == length(code)
+    end
+end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index 3be15ef9cc3172..c4e3023184c13c 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -128,7 +128,7 @@ let nt = (a=1, b=2)
     @test_throws ArgumentError blah31139(nt)
 end
 
-# Expr(:new) annoted as PartialStruct
+# Expr(:new) annotated as PartialStruct
 struct FooPartial
     x
     y
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index f90bb71e291d09..17a0753eddc640 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -310,3 +310,7 @@ let cfg = CFG(BasicBlock[
     Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3)
     @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 end
+
+# Issue #41975 - SSA conversion drops type check
+f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
+@test_throws TypeError f_if_typecheck()
diff --git a/test/compiler/validation.jl b/test/compiler/validation.jl
index d07007069b5c8b..3863d3b11351fb 100644
--- a/test/compiler/validation.jl
+++ b/test/compiler/validation.jl
@@ -21,7 +21,7 @@ end
 msig = Tuple{typeof(f22938),Int,Int,Int,Int}
 world = typemax(UInt)
 match = Base._methods_by_ftype(msig, -1, world)[]
-mi = Core.Compiler.specialize_method(match, false)
+mi = Core.Compiler.specialize_method(match)
 c0 = Core.Compiler.retrieve_code_info(mi)
 
 @test isempty(Core.Compiler.validate_code(mi))
diff --git a/test/complex.jl b/test/complex.jl
index 56c446896bf53a..6fe3046ca9a071 100644
--- a/test/complex.jl
+++ b/test/complex.jl
@@ -1088,16 +1088,8 @@ end
         @test isequal(one(T) / complex(T(-NaN),  T(-Inf)), complex(-zero(T), zero(T)))
 
         # divide complex by complex Inf
-        if T == Float64
-            @test_broken isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
-            @test_broken isequal(complex(one(T)) / complex(T(-Inf), T(Inf)), complex(-zero(T), -zero(T)))
-        elseif T == Float32
-            @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
-            @test_broken isequal(complex(one(T)) / complex(T(-Inf), T(Inf)), complex(-zero(T), -zero(T)))
-        else
-            @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
-            @test isequal(complex(one(T)) / complex(T(-Inf), T(Inf)), complex(-zero(T), -zero(T)))
-        end
+        @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T))) broken=(T==Float64)
+        @test isequal(complex(one(T)) / complex(T(-Inf), T(Inf)), complex(-zero(T), -zero(T))) broken=(T in (Float32, Float64))
     end
 end
 
diff --git a/test/copy.jl b/test/copy.jl
index 34d1c20c5f4fa7..28d34e4756a6b1 100644
--- a/test/copy.jl
+++ b/test/copy.jl
@@ -233,4 +233,8 @@ end
     @test copyto!(s, view(Int[],Int[])) == [1, 2]
     @test copyto!(s, Float64[]) == [1, 2]
     @test copyto!(s, String[]) == [1, 2] # No error
-end
\ No newline at end of file
+end
+
+@testset "deepcopy_internal arrays" begin
+    @test (@inferred Base.deepcopy_internal(zeros(), IdDict())) == zeros()
+end
diff --git a/test/core.jl b/test/core.jl
index 3c989906225283..5c3196431c6abc 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -1101,9 +1101,9 @@ end
 let strct = LoadError("yofile", 0, "bad")
     @test nfields(strct) == 3 # sanity test
     @test_throws BoundsError(strct, 10) getfield(strct, 10)
-    @test_throws ErrorException("setfield! immutable struct of type LoadError cannot be changed") setfield!(strct, 0, "")
-    @test_throws ErrorException("setfield! immutable struct of type LoadError cannot be changed") setfield!(strct, 4, "")
-    @test_throws ErrorException("setfield! immutable struct of type LoadError cannot be changed") setfield!(strct, :line, 0)
+    @test_throws ErrorException("setfield!: immutable struct of type LoadError cannot be changed") setfield!(strct, 0, "")
+    @test_throws ErrorException("setfield!: immutable struct of type LoadError cannot be changed") setfield!(strct, 4, "")
+    @test_throws ErrorException("setfield!: immutable struct of type LoadError cannot be changed") setfield!(strct, :line, 0)
     @test strct.file == "yofile"
     @test strct.line === 0
     @test strct.error == "bad"
@@ -1125,7 +1125,7 @@ let mstrct = TestMutable("melm", 1, nothing)
     @test_throws BoundsError(mstrct, 4) setfield!(mstrct, 4, "")
 end
 let strct = LoadError("yofile", 0, "bad")
-    @test_throws(ErrorException("setfield! immutable struct of type LoadError cannot be changed"),
+    @test_throws(ErrorException("setfield!: immutable struct of type LoadError cannot be changed"),
                  ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), strct, 0, ""))
 end
 let mstrct = TestMutable("melm", 1, nothing)
@@ -2391,19 +2391,14 @@ let ex = Expr(:(=), :(f8338(x;y=4)), :(x*y))
 end
 
 # call overloading (#2403)
-(x::Int)(y::Int) = x + 3y
 issue2403func(f) = f(7)
-let x = 10
-    @test x(3) == 19
-    @test x((3,)...) == 19
-    @test issue2403func(x) == 31
-end
 mutable struct Issue2403
     x
 end
 (i::Issue2403)(y) = i.x + 2y
 let x = Issue2403(20)
     @test x(3) == 26
+    @test x((3,)...) == 26
     @test issue2403func(x) == 34
 end
 
@@ -3650,7 +3645,7 @@ end
 
 end
 
-# don't allow redefining types if ninitialized changes
+# don't allow redefining types if n_uninitialized changes
 struct NInitializedTestType
     a
 end
@@ -5653,11 +5648,9 @@ f_isdefined_unionvar(y, t) = (t > 0 && (x = (t == 1 ? 1 : y)); @isdefined x)
 @test !f_isdefined_unionvar(1, 0)
 f_isdefined_splat(x...) = @isdefined x
 @test f_isdefined_splat(1, 2, 3)
-let err = try; @macroexpand @isdefined :x; false; catch ex; ex; end,
+let e = try; @macroexpand @isdefined :x; false; catch ex; ex; end,
     __source__ = LineNumberNode(@__LINE__() - 1, Symbol(@__FILE__))
-    @test err.file === string(__source__.file)
-    @test err.line === __source__.line
-    e = err.error::MethodError
+    e::MethodError
     @test e.f === getfield(@__MODULE__, Symbol("@isdefined"))
     @test e.args === (__source__, @__MODULE__, :(:x))
 end
@@ -5974,11 +5967,11 @@ end
 for U in boxedunions
     local U
     for N in (1, 2, 3, 4)
-        A = Array{U}(undef, ntuple(x->0, N)...)
+        A = Array{U}(undef, ntuple(Returns(0), N)...)
         @test isempty(A)
         @test sizeof(A) == 0
 
-        A = Array{U}(undef, ntuple(x->10, N)...)
+        A = Array{U}(undef, ntuple(Returns(10), N)...)
         @test length(A) == 10^N
         @test sizeof(A) == sizeof(Int) * (10^N)
         @test !isassigned(A, 1)
@@ -6059,11 +6052,11 @@ using Serialization
 for U in unboxedunions
     local U
     for N in (1, 2, 3, 4)
-        A = Array{U}(undef, ntuple(x->0, N)...)
+        A = Array{U}(undef, ntuple(Returns(0), N)...)
         @test isempty(A)
         @test sizeof(A) == 0
 
-        len = ntuple(x->10, N)
+        len = ntuple(Returns(10), N)
         mxsz = maximum(sizeof, Base.uniontypes(U))
         A = Array{U}(undef, len)
         @test length(A) == prod(len)
@@ -7226,14 +7219,29 @@ end
 struct B33954
     x::Q33954{B33954}
 end
-@test_broken isbitstype(Tuple{B33954})
-@test_broken isbitstype(B33954)
+@test isbitstype(Tuple{B33954})
+@test isbitstype(B33954)
+
+struct A41503{d}
+    e::d
+end
+struct B41503{j,k} <: AbstractArray{A41503{B41503{Any,k}},Any}
+    l::k
+end
+@test !isbitstype(B41503{Any,Any})
+@test_broken isbitstype(B41503{Any,Int})
 
 struct B40050 <: Ref{Tuple{B40050}}
 end
 @test string((B40050(),)) == "($B40050(),)"
 @test_broken isbitstype(Tuple{B40050})
 
+# issue #41654
+struct X41654 <: Ref{X41654}
+end
+@test isbitstype(X41654)
+@test ('a'=>X41654(),)[1][2] isa X41654
+
 # Issue #34206/34207
 function mre34206(a, n)
     va = view(a, :)
@@ -7539,7 +7547,7 @@ end
 struct S38224
     i::Union{Int,Missing}
 end
-@test S38224.zeroinit
+@test S38224.flags & 0x10 == 0x10 # .zeroinit
 for _ in 1:5
     let a = Vector{S38224}(undef, 1000000)
         @test all(x->ismissing(x.i), a)
@@ -7562,3 +7570,22 @@ function f18621()
 end
 @test f18621() == 1:5
 @test [_ for _ in 1:5] == 1:5
+
+# issue #35130
+const T35130 = Tuple{Vector{Int}, <:Any}
+@eval struct A35130
+    x::Vector{Tuple{Vector{Int}, Any}}
+    A35130(x) = $(Expr(:new, :A35130, :x))
+end
+h35130(x) = A35130(Any[x][1]::Vector{T35130})
+@test h35130(T35130[([1],1)]) isa A35130
+
+# issue #41503
+let S = Tuple{Tuple{Tuple{K, UInt128} where K<:Tuple{Int64}, Int64}},
+    T = Tuple{Tuple{Tuple{Tuple{Int64}, UInt128}, Int64}}
+    @test pointer_from_objref(T) === pointer_from_objref(S)
+    @test isbitstype(T)
+end
+
+# avoid impossible normalization (don't try to form Tuple{Complex{String}} here)
+@test Tuple{Complex{T} where String<:T<:String} == Tuple{Complex{T} where String<:T<:String}
diff --git a/test/corelogging.jl b/test/corelogging.jl
index 89f69be438810c..9c5102d848013e 100644
--- a/test/corelogging.jl
+++ b/test/corelogging.jl
@@ -341,15 +341,29 @@ end
         String(take!(io))
     end
 
+    function genmsg_err(level, message, _module, filepath, line; kws...)
+        fname = tempname()
+        f = open(fname, "w")
+        logger = SimpleLogger()
+        redirect_stderr(f) do
+            handle_message(logger, level, message, _module, :group, :id,
+                           filepath, line; kws...)
+        end
+        close(f)
+        buf = read(fname)
+        rm(fname)
+        String(buf)
+    end
+
     # Simple
-    @test genmsg(Info, "msg", Main, "some/path.jl", 101) ==
+    @test genmsg_err(Info, "msg", Main, "some/path.jl", 101) ==
     """
     ┌ Info: msg
     └ @ Main some/path.jl:101
     """
 
     # Multiline message
-    @test genmsg(Warn, "line1\nline2", Main, "some/path.jl", 101) ==
+    @test genmsg_err(Warn, "line1\nline2", Main, "some/path.jl", 101) ==
     """
     ┌ Warning: line1
     │ line2
diff --git a/test/dict.jl b/test/dict.jl
index 534e88ada036c7..cbbb475c993fd2 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -684,6 +684,7 @@ import Base.ImmutableDict
     d4 = ImmutableDict(d3, k2 => v1)
     dnan = ImmutableDict{String, Float64}(k2, NaN)
     dnum = ImmutableDict(dnan, k2 => 1)
+    f(x) = x^2
 
     @test isempty(collect(d))
     @test !isempty(collect(d1))
@@ -729,6 +730,18 @@ import Base.ImmutableDict
     @test get(d4, "key1", :default) === v2
     @test get(d4, "foo", :default) === :default
     @test get(d, k1, :default) === :default
+    @test get(d1, "key1") do
+        f(2)
+    end === v1
+    @test get(d4, "key1") do
+        f(4)
+    end === v2
+    @test get(d4, "foo") do
+        f(6)
+    end === 36
+    @test get(d, k1) do
+        f(8)
+    end === 64
     @test d1["key1"] === v1
     @test d4["key1"] === v2
     @test empty(d3) === d
@@ -1057,6 +1070,26 @@ end
     check_merge([Dict(3=>4), Dict(:a=>5)], Dict(:a => 5, 3 => 4))
 end
 
+@testset "AbstractDict mergewith!" begin
+# we use IdDict to test the mergewith! implementation for AbstractDict
+    d1 = IdDict(1 => 1, 2 => 2)
+    d2 = IdDict(2 => 3, 3 => 4)
+    d3 = IdDict{Int, Float64}(1 => 5, 3 => 6)
+    d = copy(d1)
+    @inferred mergewith!(-, d, d2)
+    @test d == IdDict(1 => 1, 2 => -1, 3 => 4)
+    d = copy(d1)
+    @inferred mergewith!(-, d, d3)
+    @test d == IdDict(1 => -4, 2 => 2, 3 => 6)
+    d = copy(d1)
+    @inferred mergewith!(+, d, d2, d3)
+    @test d == IdDict(1 => 6, 2 => 5, 3 => 10)
+    @inferred mergewith(+, d1, d2, d3)
+    d = mergewith(+, d1, d2, d3)
+    @test d isa Dict{Int, Float64}
+    @test d == Dict(1 => 6, 2 => 5, 3 => 10)
+end
+
 @testset "misc error/io" begin
     d = Dict('a'=>1, 'b'=>1, 'c'=> 3)
     @test_throws ErrorException 'a' in d
diff --git a/test/docs.jl b/test/docs.jl
index fbe24da9be8733..41c3746d7476f3 100644
--- a/test/docs.jl
+++ b/test/docs.jl
@@ -793,14 +793,7 @@ end
 
 # Issue #13905.
 let err = try; @macroexpand(@doc "" f() = @x); false; catch ex; ex; end
-    __source__ = LineNumberNode(@__LINE__() -  1, Symbol(@__FILE__))
-    err::LoadError
-    @test err.file === string(__source__.file)
-    @test err.line === __source__.line
-    err = err.error::LoadError
-    @test err.file === string(__source__.file)
-    @test err.line === __source__.line
-    err = err.error::UndefVarError
+    err::UndefVarError
     @test err.var == Symbol("@x")
  end
 
@@ -863,11 +856,9 @@ undocumented(x,y) = 3
 end # module
 
 doc_str = Markdown.parse("""
-No documentation found.
-
-No docstring found for module `$(curmod_prefix)Undocumented`.
+No docstring or readme file found for module `$(curmod_prefix)Undocumented`.
 
-# Exported names:
+# Exported names
 
 `A`, `B`, `C`, `at0`, `pt2`
 """)
@@ -965,10 +956,10 @@ abstract type $(curmod_prefix)Undocumented.at0{T<:Number, N}
 # Subtypes
 
 ```
-$(curmod_prefix)Undocumented.at1{T, N} where {Integer<:T<:Number, N}
-$(curmod_prefix)Undocumented.pt2
-$(curmod_prefix)Undocumented.st3
-$(curmod_prefix)Undocumented.st4{T, N} where {T<:Number, N}
+$(curmod_prefix)Undocumented.at1{Integer<:T<:Number, N}
+$(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer}
+$(curmod_prefix)Undocumented.st3{T<:Integer, N}
+$(curmod_prefix)Undocumented.st4{T<:Number, N}
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.at0), doc"$doc_str")
@@ -985,7 +976,7 @@ abstract type $(curmod_prefix)Undocumented.at1{T>:Integer, N}
 # Subtypes
 
 ```
-$(curmod_prefix)Undocumented.mt6{Integer, N} where N
+$(curmod_prefix)Undocumented.mt6{Integer, N}
 ```
 
 # Supertype Hierarchy
@@ -1007,9 +998,9 @@ abstract type $(curmod_prefix)Undocumented.at0{Int64, N}
 # Subtypes
 
 ```
-$(curmod_prefix)Undocumented.pt2{Int64, N, A} where {N, A>:Integer}
-$(curmod_prefix)Undocumented.st3{Int64, N} where N
-$(curmod_prefix)Undocumented.st4{Int64, N} where N
+$(curmod_prefix)Undocumented.pt2{Int64, N, A>:Integer}
+$(curmod_prefix)Undocumented.st3{Int64, N}
+$(curmod_prefix)Undocumented.st4{Int64, N}
 ```
 """)
 @test docstrings_equal(@doc(Undocumented.at_), doc"$doc_str")
@@ -1157,9 +1148,9 @@ No documentation found.
 
 # Union Composed of Types
 
- - `$(curmod_prefix)Undocumented.at1{T, N} where {T, N}`
- - `$(curmod_prefix)Undocumented.pt2{T, N, A} where {T, N, A>:Integer}`
- - `$(curmod_prefix)Undocumented.st3{T, N} where {T, N}`
+ - `$(curmod_prefix)Undocumented.at1{T} where T`
+ - `$(curmod_prefix)Undocumented.pt2{T} where T`
+ - `$(curmod_prefix)Undocumented.st3{T} where T`
  - `$(curmod_prefix)Undocumented.st4`
 """)
 @test docstrings_equal(@doc(Undocumented.ut9), doc"$doc_str")
@@ -1280,6 +1271,8 @@ end
 
 # issue #36378 (\u1e8b and x\u307 are the fully composed and decomposed forms of ẋ, respectively)
 @test sprint(repl_latex, "\u1e8b") == "\"x\u307\" can be typed by x\\dot<tab>\n\n"
+# issue 39814
+@test sprint(repl_latex, "\u2209") == "\"\u2209\" can be typed by \\notin<tab>\n\n"
 
 # issue #15684
 begin
diff --git a/test/embedding/embedding.c b/test/embedding/embedding.c
index 205b8a74502119..d1816947f38568 100644
--- a/test/embedding/embedding.c
+++ b/test/embedding/embedding.c
@@ -4,7 +4,7 @@
 #include <stdio.h>
 #include <math.h>
 
-JULIA_DEFINE_FAST_TLS() // only define this once, in an executable
+JULIA_DEFINE_FAST_TLS // only define this once, in an executable
 
 #ifdef _OS_WINDOWS_
 __declspec(dllexport) __cdecl
diff --git a/test/enums.jl b/test/enums.jl
index d3c585678c572f..5a83e1b4dfa424 100644
--- a/test/enums.jl
+++ b/test/enums.jl
@@ -143,6 +143,10 @@ let io = IOBuffer()
     @test String(take!(io)) == sprint(print, Fruit)
 end
 
+# Test printing of invalid enums
+@test repr("text/plain", reinterpret(Fruit, Int32(11))) == "<invalid #11>::Fruit = 11"
+@test repr("text/plain", reinterpret(Fruit, Int32(-5))) == "<invalid #-5>::Fruit = -5"
+
 @enum LogLevel DEBUG INFO WARN ERROR CRITICAL
 @test DEBUG < CRITICAL
 
@@ -160,6 +164,9 @@ end
 @test repr("text/plain", sevn)  == "$(string(sevn))::UI8 = 0x07"
 @test repr("text/plain", fiftn) == "$(string(fiftn))::UI8 = 0xf0"
 
+@test repr("text/plain", reinterpret(UI8, 0x01)) == "<invalid #1>::UI8 = 0x01"
+@test repr("text/plain", reinterpret(UI8, 0xff)) == "<invalid #255>::UI8 = 0xff"
+
 # test block form
 @enum BritishFood begin
     blackpudding = 1
diff --git a/test/error.jl b/test/error.jl
index bb97a0e66ed0bd..38ea3786642416 100644
--- a/test/error.jl
+++ b/test/error.jl
@@ -9,7 +9,7 @@
     Test.guardseed(12345) do
         x = ratio(collect(ExponentialBackOff(n=100, max_delay=Inf, factor=1, jitter=0.1)))
         xm = sum(x) / length(x)
-        @test (xm - 1.0) < 1e-4
+        @test abs(xm - 1.0) < 0.01
     end
 end
 @testset "retrying after errors" begin
diff --git a/test/errorshow.jl b/test/errorshow.jl
index 736e68dee1ef0e..24d0241049da04 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -86,7 +86,7 @@ method_c2(x::Int32, y::Int32, z::Int32) = true
 method_c2(x::T, y::T, z::T) where {T<:Real} = true
 
 Base.show_method_candidates(buf, Base.MethodError(method_c2,(1., 1., 2)))
-@test String(take!(buf)) ==  "\nClosest candidates are:\n  method_c2(!Matched::Int32, ::Float64, ::Any...)$cfile$(c2line+2)\n  method_c2(!Matched::Int32, ::Any...)$cfile$(c2line+1)\n  method_c2(::T, ::T, !Matched::T) where T<:Real$cfile$(c2line+5)\n  ..."
+@test String(take!(buf)) ==  "\nClosest candidates are:\n  method_c2(!Matched::Int32, ::Float64, ::Any...)$cfile$(c2line+2)\n  method_c2(::T, ::T, !Matched::T) where T<:Real$cfile$(c2line+5)\n  method_c2(!Matched::Int32, ::Any...)$cfile$(c2line+1)\n  ..."
 
 c3line = @__LINE__() + 1
 method_c3(x::Float64, y::Float64) = true
@@ -475,12 +475,6 @@ let
     @test (@macroexpand @fastmath +      ) == :(Base.FastMath.add_fast)
     @test (@macroexpand @fastmath min(1) ) == :(Base.FastMath.min_fast(1))
     let err = try; @macroexpand @doc "" f() = @x; catch ex; ex; end
-        file, line = @__FILE__, @__LINE__() - 1
-        err = err::LoadError
-        @test err.file == file && err.line == line
-        err = err.error::LoadError
-        @test err.file == file && err.line == line
-        err = err.error::UndefVarError
         @test err == UndefVarError(Symbol("@x"))
     end
     @test (@macroexpand @seven_dollar $bar) == 7
@@ -634,6 +628,16 @@ catch ex
 end
 pop!(Base.Experimental._hint_handlers[DomainError])  # order is undefined, don't copy this
 
+struct ANumber <: Number end
+let err_str
+    err_str = @except_str ANumber()(3 + 4) MethodError
+    @test occursin("objects of type $(curmod_prefix)ANumber are not callable", err_str)
+    @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
+    # issue 40478
+    err_str = @except_str ANumber()(3 + 4) MethodError
+    @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
+end
+
 # Execute backtrace once before checking formatting, see #38858
 backtrace()
 
@@ -649,13 +653,8 @@ backtrace()
     @test occursin("g28442", output[3])
     @test lstrip(output[5])[1:3] == "[2]"
     @test occursin("f28442", output[5])
-    # Issue #30233
-    # Note that we can't use @test_broken on FreeBSD here, because the tests actually do
-    # pass with some compilation options, e.g. with assertions enabled
-    if !Sys.isfreebsd()
-        @test occursin("the last 2 lines are repeated 5000 more times", output[7])
-        @test lstrip(output[8])[1:7] == "[10003]"
-    end
+    @test occursin("the last 2 lines are repeated 5000 more times", output[7])
+    @test lstrip(output[8])[1:7] == "[10003]"
 end
 
 @testset "Line number correction" begin
diff --git a/test/exceptions.jl b/test/exceptions.jl
index 7b8a54da2c6ebd..d8d1e7b45b8b53 100644
--- a/test/exceptions.jl
+++ b/test/exceptions.jl
@@ -1,52 +1,51 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test
-using Base: catch_stack
 
 @testset "Basic exception stack handling" begin
     # Exiting the catch block normally pops the exception
     try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # Exiting via a finally block does not pop the exception
     try
         try
             error("A")
         finally
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
         end
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
     end
     # The combined try-catch-finally form obeys the same rules as above
     try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
     finally
-        @test length(catch_stack()) == 0
+        @test length(current_exceptions()) == 0
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # Errors are pushed onto the stack according to catch block nesting
     try
         error("RootCause")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         try
             error("B")
         catch
-            stack = catch_stack()
+            stack = current_exceptions()
             @test length(stack) == 2
-            @test stack[1][1].msg == "RootCause"
-            @test stack[2][1].msg == "B"
+            @test stack[1].exception.msg == "RootCause"
+            @test stack[2].exception.msg == "B"
         end
         # Stack pops correctly
-        stack = catch_stack()
+        stack = current_exceptions()
         @test length(stack) == 1
-        @test stack[1][1].msg == "RootCause"
+        @test stack[1].exception.msg == "RootCause"
     end
 end
 
@@ -55,7 +54,7 @@ end
     val = try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         1
     end
     @test val == 1
@@ -64,11 +63,11 @@ end
         try
             error("A")
         catch
-            length(catch_stack())
+            length(current_exceptions())
         end
     end
     @test test_exc_stack_tailpos() == 1
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 end
 
 @testset "Exception stacks - early exit from try or catch" begin
@@ -78,7 +77,7 @@ end
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             return
         end
     end
@@ -88,7 +87,7 @@ end
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             break
         end
     end
@@ -97,19 +96,19 @@ end
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             break
         finally
-            @test length(catch_stack()) == 0
+            @test length(current_exceptions()) == 0
         end
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 
     for i=1:1
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             continue
         end
     end
@@ -117,38 +116,38 @@ end
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             continue
         finally
-            @test length(catch_stack()) == 0
+            @test length(current_exceptions()) == 0
         end
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 
     try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         @goto outofcatch
     end
     @label outofcatch
     try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         @goto outofcatch2
     finally
-        @test length(catch_stack()) == 0
+        @test length(current_exceptions()) == 0
     end
     @label outofcatch2
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 
     # Exiting from a try block in various ways should not affect the exception
     # stack state.
     try
         error("ExceptionInOuterTry")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         function test_exc_stack_try_return()
             try
                 return
@@ -173,8 +172,8 @@ end
         catch
         end
         @label outoftry
-        @test length(catch_stack()) == 1
-        @test catch_stack()[1][1] == ErrorException("ExceptionInOuterTry")
+        @test length(current_exceptions()) == 1
+        @test current_exceptions()[1].exception == ErrorException("ExceptionInOuterTry")
     end
 end
 
@@ -195,10 +194,10 @@ end
             # Explicit return => exception should be popped before finally block
             return
         finally
-            @test length(Base.catch_stack()) == 0
+            @test length(Base.current_exceptions()) == 0
         end
     end)()
-    @test length(Base.catch_stack()) == 0
+    @test length(Base.current_exceptions()) == 0
 
     while true
         try
@@ -209,11 +208,11 @@ end
                 # exception should not be popped inside finally block
                 break
             finally
-                @test length(Base.catch_stack()) == 1
+                @test length(Base.current_exceptions()) == 1
             end
         end
     end
-    @test length(Base.catch_stack()) == 0
+    @test length(Base.current_exceptions()) == 0
 
     # Nested finally handling with `return`: each finally block should observe
     # only the active exceptions as according to its nesting depth.
@@ -232,16 +231,16 @@ end
                     end
                 finally
                     # At this point err2 is dealt with
-                    @test length(Base.catch_stack()) == 1
-                    @test Base.catch_stack()[1][1] == ErrorException("err1")
+                    @test length(Base.current_exceptions()) == 1
+                    @test Base.current_exceptions()[1].exception == ErrorException("err1")
                 end
             end
         finally
             # At this point err1 is dealt with
-            @test length(Base.catch_stack()) == 0
+            @test length(Base.current_exceptions()) == 0
         end
     end)()
-    @test length(Base.catch_stack()) == 0
+    @test length(Base.current_exceptions()) == 0
 end
 
 @testset "Deep exception stacks" begin
@@ -260,10 +259,10 @@ end
     @test try
         test_exc_stack_deep(100)
     catch
-        @test catch_stack()[1][1] == ErrorException("RootCause")
-        length(catch_stack())
+        @test current_exceptions()[1].exception == ErrorException("RootCause")
+        length(current_exceptions())
     end == 100
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 end
 
 @testset "Exception stacks and Tasks" begin
@@ -280,10 +279,10 @@ end
         @test t.state == :done
         @test t.result == ErrorException("B")
         # Task exception state is preserved around task switches
-        @test length(catch_stack()) == 1
-        @test catch_stack()[1][1] == ErrorException("A")
+        @test length(current_exceptions()) == 1
+        @test current_exceptions()[1].exception == ErrorException("A")
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # test rethrow() rethrows correct state
     bt = []
     try
@@ -306,7 +305,7 @@ end
         @test exc == ErrorException("A")
         @test bt == catch_backtrace()
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # test rethrow with argument
     bt = []
     try
@@ -328,7 +327,7 @@ end
         @test exc == ErrorException("C")
         @test bt == catch_backtrace()
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # Exception stacks on other tasks
     t = @task try
         error("A")
@@ -338,7 +337,10 @@ end
     yield(t)
     @test t.state == :failed
     @test t.result == ErrorException("B")
-    @test catch_stack(t, include_bt=false) == [ErrorException("A"), ErrorException("B")]
+    @test current_exceptions(t, backtrace=false) == [
+        (exception=ErrorException("A"),backtrace=nothing),
+        (exception=ErrorException("B"),backtrace=nothing)
+    ]
     # Exception stacks for tasks which never get the chance to start
     t = @task nothing
     @test (try
@@ -347,12 +349,12 @@ end
     catch e
         e
     end).task.exception == ErrorException("expected")
-    @test length(catch_stack(t)) == 1
-    @test length(catch_stack(t)[1][2]) > 0 # backtrace is nonempty
+    @test length(current_exceptions(t)) == 1
+    @test length(current_exceptions(t)[1].backtrace) > 0 # backtrace is nonempty
     # Exception stacks should not be accessed on concurrently running tasks
     t = @task ()->nothing
     @test_throws ErrorException("Inspecting the exception stack of a task which might "*
-                                "be running concurrently isn't allowed.") catch_stack(t)
+                                "be running concurrently isn't allowed.") current_exceptions(t)
 end
 
 @testset "rethrow" begin
@@ -396,5 +398,5 @@ end
     undef_var_in_catch()
     []
 catch
-    catch_stack()
+    current_exceptions()
 end) == 2
diff --git a/test/file.jl b/test/file.jl
index b732134ec51e69..3d300668aadf38 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -63,17 +63,32 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
 end
 
 if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
-    link = joinpath(dir, "afilelink.txt")
+    link = joinpath(dir, "afilesymlink.txt")
     symlink(file, link)
     @test stat(file) == stat(link)
 
     # relative link
-    rellink = joinpath(subdir, "rel_afilelink.txt")
+    rellink = joinpath(subdir, "rel_afilesymlink.txt")
     relfile = joinpath("..", "afile.txt")
     symlink(relfile, rellink)
     @test stat(rellink) == stat(file)
 end
 
+@testset "hardlink" begin
+    link = joinpath(dir, "afilehardlink.txt")
+    hardlink(file, link)
+    @test stat(file) == stat(link)
+
+    # when the destination exists
+    @test_throws Base.IOError hardlink(file, link)
+
+    rm(link)
+
+    # the source file does not exist
+    missing_file = joinpath(dir, "for-sure-missing-file.txt")
+    @test_throws Base.IOError hardlink(missing_file, link)
+end
+
 using Random
 
 @testset "that temp names are actually unique" begin
@@ -491,6 +506,29 @@ rm(c_tmpdir, recursive=true)
 @test_throws Base._UVError("unlink($(repr(c_tmpdir)))", Base.UV_ENOENT) rm(c_tmpdir, recursive=true)
 @test rm(c_tmpdir, force=true, recursive=true) === nothing
 
+# Some operations can return multiple different error codes depending on the system environment.
+function throws_matching_exception(f::Function, acceptable_exceptions::AbstractVector)
+    try
+        f()
+        @error "No exception was thrown."
+        return false
+    catch ex
+        if ex in acceptable_exceptions
+            return true
+        else
+            @error "The thrown exception is not in the list of acceptable exceptions" acceptable_exceptions exception=(ex, catch_backtrace())
+            return false
+        end
+    end
+end
+function throws_matching_uv_error(f::Function, pfx::AbstractString, codes::AbstractVector{<:Integer})
+    acceptable_exceptions = multiple_uv_errors(pfx, codes)
+    return throws_matching_exception(f, acceptable_exceptions)
+end
+function multiple_uv_errors(pfx::AbstractString, codes::AbstractVector{<:Integer})
+    return [Base._UVError(pfx, code) for code in codes]
+end
+
 if !Sys.iswindows()
     # chown will give an error if the user does not have permissions to change files
     if get(ENV, "USER", "") == "root" || get(ENV, "HOME", "") == "/root"
@@ -503,8 +541,12 @@ if !Sys.iswindows()
         @test stat(file).gid == 0
         @test stat(file).uid == 0
     else
-        @test_throws Base._UVError("chown($(repr(file)), -2, -1)", Base.UV_EPERM) chown(file, -2, -1)  # Non-root user cannot change ownership to another user
-        @test_throws Base._UVError("chown($(repr(file)), -1, -2)", Base.UV_EPERM) chown(file, -1, -2)  # Non-root user cannot change group to a group they are not a member of (eg: nogroup)
+        @test throws_matching_uv_error("chown($(repr(file)), -2, -1)", [Base.UV_EPERM, Base.UV_EINVAL]) do
+            chown(file, -2, -1)  # Non-root user cannot change ownership to another user
+        end
+        @test throws_matching_uv_error("chown($(repr(file)), -1, -2)", [Base.UV_EPERM, Base.UV_EINVAL]) do
+            chown(file, -1, -2)  # Non-root user cannot change group to a group they are not a member of (eg: nogroup)
+        end
     end
 else
     # test that chown doesn't cause any errors for Windows
@@ -702,7 +744,7 @@ let
     @test a_stat.size == b_stat.size
     @test a_stat.size == c_stat.size
 
-    @test parse(Int, match(r"mode=(.*),", sprint(show, a_stat)).captures[1]) == a_stat.mode
+    @test parse(Int, split(sprint(show, a_stat),"mode: ")[2][1:8]) == a_stat.mode
 
     close(af)
     rm(afile)
@@ -1605,3 +1647,48 @@ if Sys.iswindows()
     @test rm(tmp) === nothing
 end
 end
+
+@testset "StatStruct show's extended details" begin
+    f, io = mktemp()
+    s = stat(f)
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(")
+    @test endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for ")
+    @test rstrip(stat_show_str_multi) == stat_show_str_multi # no trailing \n
+    @test occursin(repr(f), stat_show_str)
+    @test occursin(repr(f), stat_show_str_multi)
+    if Sys.iswindows()
+        @test occursin("mode: 0o100666 (-rw-rw-rw-)", stat_show_str)
+        @test occursin("mode: 0o100666 (-rw-rw-rw-)\n", stat_show_str_multi)
+    else
+        @test occursin("mode: 0o100600 (-rw-------)", stat_show_str)
+        @test occursin("mode: 0o100600 (-rw-------)\n", stat_show_str_multi)
+    end
+    if Sys.iswindows() == false
+        @test !isnothing(Base.Filesystem.getusername(s.uid))
+        @test !isnothing(Base.Filesystem.getgroupname(s.gid))
+    end
+    d = mktempdir()
+    s = stat(d)
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(")
+    @test endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for ")
+    @test rstrip(stat_show_str_multi) == stat_show_str_multi # no trailing \n
+    @test occursin(repr(d), stat_show_str)
+    @test occursin(repr(d), stat_show_str_multi)
+    if Sys.iswindows()
+        @test occursin("mode: 0o040666 (drw-rw-rw-)", stat_show_str)
+        @test occursin("mode: 0o040666 (drw-rw-rw-)\n", stat_show_str_multi)
+    else
+        @test occursin("mode: 0o040700 (drwx------)", stat_show_str)
+        @test occursin("mode: 0o040700 (drwx------)\n", stat_show_str_multi)
+    end
+    if Sys.iswindows() == false
+        @test !isnothing(Base.Filesystem.getusername(s.uid))
+        @test !isnothing(Base.Filesystem.getgroupname(s.gid))
+    end
+end
diff --git a/test/float16.jl b/test/float16.jl
index 5ac9582f900c4e..804aba9ef741ba 100644
--- a/test/float16.jl
+++ b/test/float16.jl
@@ -21,6 +21,20 @@ g = Float16(1.)
     @test isequal(Float16(0.0), Float16(0.0))
     @test !isequal(Float16(-0.0), Float16(0.0))
     @test !isequal(Float16(0.0), Float16(-0.0))
+
+    for T = Base.BitInteger_types
+        @test -Inf16 < typemin(T)
+        @test -Inf16 <= typemin(T)
+        @test typemin(T) > -Inf16
+        @test typemin(T) >= -Inf16
+        @test typemin(T) != -Inf16
+
+        @test Inf16 > typemax(T)
+        @test Inf16 >= typemax(T)
+        @test typemax(T) < Inf16
+        @test typemax(T) <= Inf16
+        @test typemax(T) != Inf16
+    end
 end
 
 @testset "convert" begin
@@ -80,6 +94,9 @@ end
 
     # no domain error is thrown for negative values
     @test cbrt(Float16(-1.0)) == -1.0
+    # test zero and Inf
+    @test cbrt(Float16(0.0)) == Float16(0.0)
+    @test cbrt(Inf16) == Inf16
 end
 @testset "binary ops" begin
     @test f+g === Float16(3f0)
@@ -157,6 +174,10 @@ end
     # halfway between and last bit is 0
     ff = reinterpret(Float32,                           0b00111110101010100001000000000000)
     @test Float32(Float16(ff)) === reinterpret(Float32, 0b00111110101010100000000000000000)
+
+    for x = (typemin(Int64), typemin(Int128)), R = (RoundUp, RoundToZero)
+        @test Float16(x, R) == nextfloat(-Inf16)
+    end
 end
 
 # issue #5948
diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl
index e3f3203e3c069e..7e9d8021ac5df4 100644
--- a/test/floatfuncs.jl
+++ b/test/floatfuncs.jl
@@ -119,6 +119,23 @@ end
     @test round(Float32(1.2), sigdigits=5) === Float32(1.2)
     @test round(Float16(0.6), sigdigits=2) === Float16(0.6)
     @test round(Float16(1.1), sigdigits=70) === Float16(1.1)
+
+    # issue 37171
+    @test round(9.87654321e-308, sigdigits = 1) ≈ 1.0e-307
+    @test round(9.87654321e-308, sigdigits = 2) ≈ 9.9e-308
+    @test round(9.87654321e-308, sigdigits = 3) ≈ 9.88e-308
+    @test round(9.87654321e-308, sigdigits = 4) ≈ 9.877e-308
+    @test round(9.87654321e-308, sigdigits = 5) ≈ 9.8765e-308
+    @test round(9.87654321e-308, sigdigits = 6) ≈ 9.87654e-308
+    @test round(9.87654321e-308, sigdigits = 7) ≈ 9.876543e-308
+    @test round(9.87654321e-308, sigdigits = 8) ≈ 9.8765432e-308
+    @test round(9.87654321e-308, sigdigits = 9) ≈ 9.87654321e-308
+    @test round(9.87654321e-308, sigdigits = 10) ≈ 9.87654321e-308
+    @test round(9.87654321e-308, sigdigits = 11) ≈ 9.87654321e-308
+
+    @inferred round(Float16(1.), sigdigits=2)
+    @inferred round(Float32(1.), sigdigits=2)
+    @inferred round(Float64(1.), sigdigits=2)
 end
 
 @testset "literal pow matches runtime pow matches optimized pow" begin
diff --git a/test/generic_map_tests.jl b/test/generic_map_tests.jl
index 8fde731770bf38..abd9a31946a9ad 100644
--- a/test/generic_map_tests.jl
+++ b/test/generic_map_tests.jl
@@ -53,6 +53,28 @@ function generic_map_tests(mapf, inplace_mapf=nothing)
         @test A == map(x->x*x*x, Float64[1:10...])
         @test A === B
     end
+
+    # Issue #28382: inferrability of map with Union eltype
+    @test isequal(map(+, [1, 2], [3.0, missing]), [4.0, missing])
+    @test Core.Compiler.return_type(map, Tuple{typeof(+), Vector{Int},
+                                               Vector{Union{Float64, Missing}}}) ==
+        Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
+    @test isequal(map(tuple, [1, 2], [3.0, missing]), [(1, 3.0), (2, missing)])
+    @test Core.Compiler.return_type(map, Tuple{typeof(tuple), Vector{Int},
+                                               Vector{Union{Float64, Missing}}}) ==
+        Vector{<:Tuple{Int, Any}}
+    # Check that corner cases do not throw an error
+    @test isequal(map(x -> x === 1 ? nothing : x, [1, 2, missing]),
+                  [nothing, 2, missing])
+    @test isequal(map(x -> x === 1 ? nothing : x, Any[1, 2, 3.0, missing]),
+                  [nothing, 2, 3, missing])
+    @test map((x,y)->(x==1 ? 1.0 : x, y), [1, 2, 3], ["a", "b", "c"]) ==
+        [(1.0, "a"), (2, "b"), (3, "c")]
+    @test map(typeof, [iszero, isdigit]) == [typeof(iszero), typeof(isdigit)]
+    @test map(typeof, [iszero, iszero]) == [typeof(iszero), typeof(iszero)]
+    @test isequal(map(identity, Vector{<:Union{Int, Missing}}[[1, 2],[missing, 1]]),
+                  [[1, 2],[missing, 1]])
+    @test map(x -> x < 0 ? false : x, Int[]) isa Vector{Integer}
 end
 
 function testmap_equivalence(mapf, f, c...)
@@ -76,6 +98,6 @@ function run_map_equivalence_tests(mapf)
     testmap_equivalence(mapf, identity, (1,2,3,4))
     testmap_equivalence(mapf, (x,y,z)->x+y+z, 1,2,3)
     testmap_equivalence(mapf, x->x ? false : true, BitMatrix(undef, 10,10))
-    testmap_equivalence(mapf, x->"foobar", BitMatrix(undef, 10,10))
+    testmap_equivalence(mapf, Returns("foobar"), BitMatrix(undef, 10,10))
     testmap_equivalence(mapf, (x,y,z)->string(x,y,z), BitVector(undef, 10), fill(1.0, 10), "1234567890")
 end
diff --git a/test/gmp.jl b/test/gmp.jl
index 96ffdedb7a93c5..875d8895b6b34f 100644
--- a/test/gmp.jl
+++ b/test/gmp.jl
@@ -69,6 +69,18 @@ ee = typemax(Int64)
             @test big(typeof(complex(x, x))) == typeof(big(complex(x, x)))
         end
     end
+    @testset "division" begin
+        oz = big(1 // 0)
+        zo = big(0 // 1)
+
+        @test_throws DivideError() oz / oz
+        @test oz == oz / one(oz)
+        @test -oz == oz / (-one(oz))
+        @test zero(oz) == one(oz) / oz
+        @test_throws DivideError() zo / zo
+        @test one(zo) / zo == big(1//0)
+        @test -one(zo) / zo == big(-1//0)
+    end
 end
 @testset "div, fld, mod, rem" begin
     for i = -10:10, j = [-10:-1; 1:10]
@@ -212,6 +224,9 @@ let a, b
     a = rand(1:100, 10000)
     b = map(BigInt, a)
     @test sum(a) == sum(b)
+    @test 0 == sum(BigInt[]) isa BigInt
+    @test prod(b) == foldl(*, b)
+    @test 1 == prod(BigInt[]) isa BigInt
 end
 
 @testset "Iterated arithmetic" begin
@@ -224,10 +239,15 @@ end
     g = parse(BigInt,"-1")
 
     @test +(a, b) == parse(BigInt,"327547")
+    @test 327547 == sum((a, b)) isa BigInt
     @test +(a, b, c) == parse(BigInt,"3426495623485904783805894")
+    @test 3426495623485904783805894 == sum((a, b, c)) isa BigInt
     @test +(a, b, c, d) == parse(BigInt,"3426495623485903384821764")
+    @test 3426495623485903384821764 == sum((a, b, c, d)) isa BigInt
     @test +(a, b, c, d, f) == parse(BigInt,"2413804710837418037418307081437318690130968843290370569228")
+    @test 2413804710837418037418307081437318690130968843290370569228 == sum((a, b, c, d, f)) isa BigInt
     @test +(a, b, c, d, f, g) == parse(BigInt,"2413804710837418037418307081437318690130968843290370569227")
+    @test 2413804710837418037418307081437318690130968843290370569227 == sum((a, b, c, d, f, g)) isa BigInt
 
     @test *(a, b) == parse(BigInt,"3911455620")
     @test *(a, b, c) == parse(BigInt,"13402585563389346256121263521460140")
@@ -241,6 +261,12 @@ end
     @test xor(a, b, c, d, f) == parse(BigInt,"-2413804710837418037418307081437316711364709261074607933698")
     @test xor(a, b, c, d, f, g) == parse(BigInt,"2413804710837418037418307081437316711364709261074607933697")
 
+    @test nand(a, b) == parse(BigInt,"-125")
+    @test ⊼(a, b) == parse(BigInt,"-125")
+
+    @test nor(a, b) == parse(BigInt,"-327424")
+    @test ⊽(a, b) == parse(BigInt,"-327424")
+
     @test (&)(a, b) == parse(BigInt,"124")
     @test (&)(a, b, c) == parse(BigInt,"72")
     @test (&)(a, b, c, d) == parse(BigInt,"8")
diff --git a/test/hashing.jl b/test/hashing.jl
index c2afd7426ecb36..e5223ed4070622 100644
--- a/test/hashing.jl
+++ b/test/hashing.jl
@@ -260,3 +260,27 @@ end
 
 # PR #40083
 @test hash(1:1000) == hash(collect(1:1000))
+
+@testset "test the other core data hashing functions" begin
+    @testset "hash_64_32" begin
+        vals = vcat(
+            typemin(UInt64) .+ UInt64[1:4;],
+            typemax(UInt64) .- UInt64[4:-1:0;]
+        )
+
+        for a in vals, b in vals
+            @test isequal(a, b) == (Base.hash_64_32(a) == Base.hash_64_32(b))
+        end
+    end
+
+    @testset "hash_32_32" begin
+        vals = vcat(
+            typemin(UInt32) .+ UInt32[1:4;],
+            typemax(UInt32) .- UInt32[4:-1:0;]
+        )
+
+        for a in vals, b in vals
+            @test isequal(a, b) == (Base.hash_32_32(a) == Base.hash_32_32(b))
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/int.jl b/test/int.jl
index f0e157711c808e..52f554718645e1 100644
--- a/test/int.jl
+++ b/test/int.jl
@@ -141,6 +141,10 @@ SItypes = Base.BitSigned_types
         R = sizeof(S) < sizeof(Int) ? Int : S
         @test promote(R(3), T(3)) === (sizeof(R) < sizeof(T) ? (T(3), T(3)) : (R(3), R(3)))
     end
+
+    for i in 1:length(UItypes)
+        @test promote(UItypes[i](3), SItypes[i](3)) === (UItypes[i](3), UItypes[i](3))
+    end
 end
 @testset "limiting conversions" begin
     for T in (Int8, Int16, Int32, Int64)
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index 24505935a2fed3..40cda879193d46 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -12,6 +12,7 @@ using Random
         @test gcd(T(0), T(15)) === T(15)
         @test gcd(T(15), T(0)) === T(15)
         if T <: Signed
+            @test gcd(T(-12)) === T(12)
             @test gcd(T(0), T(-15)) === T(15)
             @test gcd(T(-15), T(0)) === T(15)
             @test gcd(T(3), T(-15)) === T(3)
@@ -78,6 +79,7 @@ using Random
         @test lcm(T(0), T(3)) === T(0)
         @test lcm(T(0), T(0)) === T(0)
         if T <: Signed
+            @test lcm(T(-12)) === T(12)
             @test lcm(T(0), T(-4)) === T(0)
             @test lcm(T(-4), T(0)) === T(0)
             @test lcm(T(4), T(-6)) === T(12)
@@ -154,6 +156,7 @@ end
         @test gcd(T[3, 15]) === T(3)
         @test gcd(T[0, 15]) === T(15)
         if T <: Signed
+            @test gcd(T[-12]) === T(12)
             @test gcd(T[3,-15]) === T(3)
             @test gcd(T[-3,-15]) === T(3)
         end
@@ -163,12 +166,12 @@ end
         @test gcd(T[2, 4, 3, 5]) === T(1)
 
         @test lcm(T[]) === T(1)
-        @test lcm(T[2]) === T(2)
         @test lcm(T[2, 3]) === T(6)
         @test lcm(T[4, 6]) === T(12)
         @test lcm(T[3, 0]) === T(0)
         @test lcm(T[0, 0]) === T(0)
         if T <: Signed
+            @test lcm(T[-2]) === T(2)
             @test lcm(T[4, -6]) === T(12)
             @test lcm(T[-4, -6]) === T(12)
         end
@@ -322,6 +325,8 @@ end
 
 end
 
+primitive type BitString128 128 end
+
 @testset "bin/oct/dec/hex/bits" begin
     @test string(UInt32('3'), base = 2) == "110011"
     @test string(UInt32('3'), pad = 7, base = 2) == "0110011"
@@ -353,6 +358,7 @@ end
     @test bitstring(1035) == (Int == Int32 ? "00000000000000000000010000001011" :
         "0000000000000000000000000000000000000000000000000000010000001011")
     @test bitstring(Int128(3)) == "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011"
+    @test bitstring(reinterpret(BitString128, Int128(3))) == "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011"
 end
 
 @testset "digits/base" begin
@@ -473,3 +479,11 @@ end
 for b in [-100:-2; 2:100;]
     @test Base.ndigits0z(0, b) == 0
 end
+
+@testset "constant prop in gcd" begin
+    ci = code_typed(() -> gcd(14, 21))[][1]
+    @test ci.code == Any[Core.ReturnNode(7)]
+
+    ci = code_typed(() -> 14 // 21)[][1]
+    @test ci.code == Any[Core.ReturnNode(2 // 3)]
+end
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 47560d7dbd6267..589590cf78d14e 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -100,9 +100,20 @@ let f = Core.Intrinsics.ashr_int
     @test f(Int32(2), -1) == 0
 end
 
+const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+
 # issue #29929
-@test unsafe_store!(Ptr{Nothing}(C_NULL), nothing) === Ptr{Nothing}(0)
-@test unsafe_load(Ptr{Nothing}(0)) === nothing
+let p = Ptr{Nothing}(0)
+    @test unsafe_store!(p, nothing) === C_NULL
+    @test unsafe_load(p) === nothing
+    @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
+    @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p
+    @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing
+    @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing)
+    @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true))
+    @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false))
+end
+
 struct GhostStruct end
 @test unsafe_load(Ptr{GhostStruct}(rand(Int))) === GhostStruct()
 
@@ -152,3 +163,116 @@ end
     @test_intrinsic Core.Intrinsics.fptosi Int Float16(3.3) 3
     @test_intrinsic Core.Intrinsics.fptoui UInt Float16(3.3) UInt(3)
 end
+
+@test Core.Intrinsics.atomic_fence(:sequentially_consistent) === nothing
+@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) == nothing
+
+primitive type Int256 <: Signed 256 end
+Int256(i::Int) = Core.Intrinsics.sext_int(Int256, i)
+primitive type Int512 <: Signed 512 end
+Int512(i::Int) = Core.Intrinsics.sext_int(Int512, i)
+function add(i::T, j)::T where {T}; return i + j; end
+swap(i, j) = j
+
+for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Complex{Int512}, Any)
+    r = Ref{TT}(10)
+    GC.@preserve r begin
+        (function (::Type{TT}) where TT
+            p = Base.unsafe_convert(Ptr{TT}, r)
+            T(x) = convert(TT, x)
+            S = UInt32
+            if TT !== Any
+                @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent)
+                @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent)
+                @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent)
+            end
+            @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
+            if sizeof(r) > 8
+                @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerswap(p, T(100), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
+                @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
+            else
+                TT !== Any && @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(10)
+                @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(1)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
+                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101))
+                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(102)
+                @test Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) === T(102)
+                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(103)
+            end
+            if TT === Any
+                @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === Pair{TT,TT}(T(103), S(103))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(103)
+                @test Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) === p
+                @test Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) === S(1)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false))
+                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(2)
+            end
+        end)(TT,)
+    end
+end
+
+mutable struct IntWrap <: Signed
+    x::Int
+end
+Base.:(+)(a::IntWrap, b::Int) = IntWrap(a.x + b)
+Base.:(+)(a::IntWrap, b::IntWrap) = IntWrap(a.x + b.x)
+Base.show(io::IO, a::IntWrap) = print(io, "IntWrap(", a.x, ")")
+(function()
+    TT = IntWrap
+    T(x) = convert(TT, x)
+    r = Ref{TT}(10)
+    p = Base.unsafe_convert(Ptr{TT}, r)
+    GC.@preserve r begin
+        S = UInt32
+        @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent)
+        @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent)
+        @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent)
+        r2 = Core.Intrinsics.pointerref(p, 1, 1)
+        @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[]
+        @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[]
+        @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 1 === r[].x && r2 !== r[]
+        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 1 && r[].x === 100 && r2 !== r[]
+        @test succ
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
+        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
+        @test !succ
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
+        r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 100 !== r[].x && r2 !== r[]
+        @test r3 isa IntWrap && r3.x === 101 === r[].x && r3 !== r[]
+        r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 101 !== r[].x && r2 !== r[]
+        @test r3 isa IntWrap && r3.x === 102 === r[].x && r3 !== r[]
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 102 === r[].x && r2 !== r[]
+        r2 = Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 102 !== r[].x && r[].x == 103 && r2 !== r[]
+        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
+        @test !succ
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
+    end
+end)()
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index 80972a7c654484..d8211aa7086b34 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -9,7 +9,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
 @testset "Read/write empty IOBuffer" begin
     io = IOBuffer()
     @test eof(io)
-    @test_throws EOFError read(io,UInt8)
+    @test_throws EOFError read(io, UInt8)
     @test write(io,"abc") === 3
     @test isreadable(io)
     @test iswritable(io)
@@ -18,7 +18,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     @test position(io) == 3
     @test eof(io)
     seek(io, 0)
-    @test read(io,UInt8) == convert(UInt8, 'a')
+    @test read(io, UInt8) == convert(UInt8, 'a')
     a = Vector{UInt8}(undef, 2)
     @test read!(io, a) == a
     @test a == UInt8['b','c']
@@ -34,22 +34,24 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     truncate(io, 10)
     @test position(io) == 0
     @test all(io.data .== 0)
-    @test write(io,Int16[1,2,3,4,5,6]) === 12
+    @test write(io, Int16[1, 2, 3, 4, 5, 6]) === 12
     seek(io, 2)
     truncate(io, 10)
     @test ioslength(io) == 10
     io.readable = false
-    @test_throws ArgumentError read!(io,UInt8[0])
+    @test_throws ArgumentError read!(io, UInt8[0])
     truncate(io, 0)
     @test write(io,"boston\ncambridge\n") > 0
     @test String(take!(io)) == "boston\ncambridge\n"
     @test String(take!(io)) == ""
     @test write(io, ComplexF64(0)) === 16
     @test write(io, Rational{Int64}(1//2)) === 16
-    close(io)
-    @test_throws ArgumentError write(io,UInt8[0])
-    @test_throws ArgumentError seek(io,0)
+    @test closewrite(io) === nothing
+    @test_throws ArgumentError write(io, UInt8[0])
     @test eof(io)
+    @test close(io) === nothing
+    @test_throws ArgumentError write(io, UInt8[0])
+    @test_throws ArgumentError seek(io, 0)
 end
 
 @testset "Read/write readonly IOBuffer" begin
@@ -237,7 +239,7 @@ end
     @test isreadable(bstream)
     @test iswritable(bstream)
     @test bytesavailable(bstream) == 0
-    @test sprint(show, bstream) == "BufferStream() bytes waiting:$(bytesavailable(bstream.buffer)), isopen:true"
+    @test sprint(show, bstream) == "BufferStream(bytes waiting=$(bytesavailable(bstream.buffer)), isopen=true)"
     a = rand(UInt8,10)
     write(bstream,a)
     @test !eof(bstream)
@@ -251,9 +253,10 @@ end
     @test !eof(bstream)
     read!(bstream,c)
     @test c == a[3:10]
-    @test close(bstream) === nothing
+    @test closewrite(bstream) === nothing
     @test eof(bstream)
     @test bytesavailable(bstream) == 0
+    @test close(bstream) === nothing
     flag = Ref{Bool}(false)
     event = Base.Event()
     bstream = Base.BufferStream()
diff --git a/test/iterators.jl b/test/iterators.jl
index b45a51fd87042d..c7d00c4e7e2e83 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -2,6 +2,7 @@
 
 using Base.Iterators
 using Random
+using Base: IdentityUnitRange
 
 @test Base.IteratorSize(Any) isa Base.SizeUnknown
 
@@ -198,7 +199,7 @@ end
     @test collect(takewhile(<(4),1:10)) == [1,2,3]
     @test collect(takewhile(<(4),Iterators.countfrom(1))) == [1,2,3]
     @test collect(takewhile(<(4),5:10)) == []
-    @test collect(takewhile(_->true,5:10)) == 5:10
+    @test collect(takewhile(Returns(true),5:10)) == 5:10
     @test collect(takewhile(isodd,[1,1,2,3])) == [1,1]
     @test collect(takewhile(<(2), takewhile(<(3), [1,1,2,3]))) == [1,1]
 end
@@ -209,8 +210,8 @@ end
     @test collect(dropwhile(<(4), 1:10)) == 4:10
     @test collect(dropwhile(<(4), 1:10)) isa Vector{Int}
     @test isempty(dropwhile(<(4), []))
-    @test collect(dropwhile(_->false,1:3)) == 1:3
-    @test isempty(dropwhile(_->true, 1:3))
+    @test collect(dropwhile(Returns(false),1:3)) == 1:3
+    @test isempty(dropwhile(Returns(true), 1:3))
     @test collect(dropwhile(isodd,[1,1,2,3])) == [2,3]
     @test collect(dropwhile(iseven,dropwhile(isodd,[1,1,2,3]))) == [3]
 end
@@ -291,6 +292,15 @@ let (a, b) = (1:3, [4 6;
     end
 end
 
+# collect stateful iterator
+let
+    itr = (i+1 for i in Base.Stateful([1,2,3]))
+    @test collect(itr) == [2, 3, 4]
+    A = zeros(Int, 0, 0)
+    itr = (i-1 for i in Base.Stateful(A))
+    @test collect(itr) == Int[] # Stateful do not preserve shape
+end
+
 # with 1D inputs
 let a = 1:2,
     b = 1.0:10.0,
@@ -381,7 +391,7 @@ let a = 1:2,
     end
 
     # size infinite or unknown raises an error
-    for itr in Any[countfrom(1), Iterators.filter(i->0, 1:10)]
+    for itr in Any[countfrom(1), Iterators.filter(Returns(0), 1:10)]
         @test_throws ArgumentError length(product(itr))
         @test_throws ArgumentError   size(product(itr))
         @test_throws ArgumentError  ndims(product(itr))
@@ -592,7 +602,7 @@ end
 end
 
 @testset "filter empty iterable #16704" begin
-    arr = filter(n -> true, 1:0)
+    arr = filter(Returns(true), 1:0)
     @test length(arr) == 0
     @test eltype(arr) == Int
 end
@@ -848,3 +858,15 @@ end
     @test cumprod(x + 1 for x in 1:3) == [2, 6, 24]
     @test accumulate(+, (x^2 for x in 1:3); init=100) == [101, 105, 114]
 end
+
+@testset "proper patition for non-1-indexed vector" begin
+    @test partition(IdentityUnitRange(11:19), 5) |> collect == [11:15,16:19] # IdentityUnitRange
+end
+
+@testset "Iterators.peel" begin
+    @test Iterators.peel([]) == nothing
+    @test Iterators.peel(1:10)[1] == 1
+    @test Iterators.peel(1:10)[2] |> collect == 2:10
+    @test Iterators.peel(x^2 for x in 2:4)[1] == 4
+    @test Iterators.peel(x^2 for x in 2:4)[2] |> collect == [9, 16]
+end
diff --git a/test/keywordargs.jl b/test/keywordargs.jl
index f9be8edd80dc09..9cbae2b1a0b19d 100644
--- a/test/keywordargs.jl
+++ b/test/keywordargs.jl
@@ -374,3 +374,16 @@ using InteractiveUtils
 no_kw_args(x::Int) = 0
 @test_throws MethodError no_kw_args(1, k=1)
 @test_throws MethodError no_kw_args("", k=1)
+
+# issue #40964
+f40964(xs::Int...=1; k = 2) = (xs, k)
+@test f40964() === ((1,), 2)
+@test f40964(7, 8) === ((7,8), 2)
+@test f40964(7, 8, k=0) === ((7,8), 0)
+# issue #41416
+@test f40964(; k = 1) === ((1,), 1)
+f41416(a...="a"; b=true) = (b, a)
+@test f41416()           === (true, ("a",))
+@test f41416(;b=false)   === (false, ("a",))
+@test f41416(33)         === (true, (33,))
+@test f41416(3; b=false) === (false, (3,))
diff --git a/test/llvmpasses/alloc-opt-gcframe.jl b/test/llvmpasses/alloc-opt-gcframe.jl
index e48a85641257b7..227569a545adb6 100644
--- a/test/llvmpasses/alloc-opt-gcframe.jl
+++ b/test/llvmpasses/alloc-opt-gcframe.jl
@@ -16,6 +16,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 # CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
 println("""
 define {} addrspace(10)* @return_obj() {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
@@ -33,6 +34,7 @@ define {} addrspace(10)* @return_obj() {
 # CHECK-NOT: @llvm.lifetime.end
 println("""
 define i64 @return_load(i64 %i) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
@@ -47,12 +49,14 @@ define i64 @return_load(i64 %i) {
 # CHECK-LABEL: }{{$}}
 
 # CHECK-LABEL: @ccall_obj
+# CHECK: call {}*** @julia.get_pgcstack()
 # CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK: @jl_gc_pool_alloc
 # CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
 println("""
 define void @ccall_obj(i8* %fptr) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
@@ -65,6 +69,7 @@ define void @ccall_obj(i8* %fptr) {
 
 # CHECK-LABEL: @ccall_ptr
 # CHECK: alloca i64
+# CHECK: call {}*** @julia.get_pgcstack()
 # CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK-NOT: @jl_gc_pool_alloc
@@ -75,6 +80,7 @@ define void @ccall_obj(i8* %fptr) {
 # CHECK-NEXT: ret void
 println("""
 define void @ccall_ptr(i8* %fptr) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
@@ -89,12 +95,14 @@ define void @ccall_ptr(i8* %fptr) {
 # CHECK-LABEL: }{{$}}
 
 # CHECK-LABEL: @ccall_unknown_bundle
+# CHECK: call {}*** @julia.get_pgcstack()
 # CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK: @jl_gc_pool_alloc
 # CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
 println("""
 define void @ccall_unknown_bundle(i8* %fptr) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
@@ -110,6 +118,7 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 
 # CHECK-LABEL: @lifetime_branches
 # CHECK: alloca i64
+# CHECK: call {}*** @julia.get_pgcstack()
 # CHECK: call {}*** @julia.ptls_states()
 # CHECK: L1:
 # CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
@@ -126,6 +135,7 @@ define void @ccall_unknown_bundle(i8* %fptr) {
 # CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
 println("""
 define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   br i1 %b, label %L1, label %L3
@@ -151,12 +161,14 @@ L3:
 # CHECK-LABEL: }{{$}}
 
 # CHECK-LABEL: @object_field
+# CHECK: call {}*** @julia.get_pgcstack()
 # CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK-NOT: @jl_gc_pool_alloc
 # CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !0
 println("""
 define void @object_field({} addrspace(10)* %field) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
@@ -170,6 +182,7 @@ define void @object_field({} addrspace(10)* %field) {
 
 # CHECK-LABEL: @memcpy_opt
 # CHECK: alloca [16 x i8], align 16
+# CHECK: call {}*** @julia.get_pgcstack()
 # CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK-NOT: @jl_gc_pool_alloc
@@ -177,6 +190,7 @@ define void @object_field({} addrspace(10)* %field) {
 println("""
 define void @memcpy_opt(i8* %v22) {
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %v6 = call {}*** @julia.ptls_states()
   %v18 = bitcast {}*** %v6 to i8*
   %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %v18, $isz 16, {} addrspace(10)* @tag)
@@ -189,6 +203,7 @@ top:
 # CHECK-LABEL: }{{$}}
 
 # CHECK-LABEL: @preserve_opt
+# CHECK: call {}*** @julia.get_pgcstack()
 # CHECK: call {}*** @julia.ptls_states()
 # CHECK-NOT: @julia.gc_alloc_obj
 # CHECK-NOT: @jl_gc_pool_alloc
@@ -197,6 +212,7 @@ top:
 println("""
 define void @preserve_opt(i8* %v22) {
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %v6 = call {}*** @julia.ptls_states()
   %v18 = bitcast {}*** %v6 to i8*
   %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %v18, $isz 16, {} addrspace(10)* @tag)
@@ -212,6 +228,7 @@ top:
 # CHECK-LABEL: }{{$}}
 
 # CHECK-LABEL: @preserve_branches
+# CHECK: call {}*** @julia.get_pgcstack()
 # CHECK: call {}*** @julia.ptls_states()
 # CHECK: L1:
 # CHECK-NEXT: @external_function()
@@ -224,6 +241,7 @@ top:
 # CHECK: L3:
 println("""
 define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   br i1 %b, label %L1, label %L3
@@ -249,6 +267,7 @@ L3:
 println("""
 declare void @external_function()
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*)
 declare {}* @julia.pointer_from_objref({} addrspace(11)*)
 declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/llvmpasses/alloc-opt-pass.jl b/test/llvmpasses/alloc-opt-pass.jl
index 9bde40036ff735..8fbc9d2c7b7c47 100644
--- a/test/llvmpasses/alloc-opt-pass.jl
+++ b/test/llvmpasses/alloc-opt-pass.jl
@@ -24,6 +24,7 @@ println("""
 # CHECK: L3:
 println("""
 define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   br i1 %b, label %L1, label %L3
@@ -58,6 +59,7 @@ L3:
 # CHECK: L3:
 println("""
 define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %v2 = call {} addrspace(10)* @external_function2()
@@ -85,6 +87,7 @@ L3:
 # CHECK: ret void
 println("""
 define void @legal_int_types() {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 12, {} addrspace(10)* @tag)
@@ -101,6 +104,7 @@ println("""
 declare void @external_function()
 declare {} addrspace(10)* @external_function2()
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*)
 declare {}* @julia.pointer_from_objref({} addrspace(11)*)
 declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
@@ -119,6 +123,7 @@ declare void @llvm.julia.gc_preserve_end(token)
 # CHECK: load i
 println("""
 define void @memref_collision($isz %x) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
   %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index 04376f7f814964..e29ada14a0d00c 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -5,6 +5,7 @@
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
 
@@ -21,12 +22,11 @@ top:
 ; CHECK-LABEL: @gc_frame_lowering
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; CHECK: %ptls = call {}*** @julia.ptls_states()
-  %ptls = call {}*** @julia.ptls_states()
+; CHECK:  [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack()
+  %pgcstack = call {}*** @julia.get_pgcstack()
 ; CHECK-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0
 ; CHECK-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64*
 ; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0
-; CHECK-DAG: [[GCFRAME_SLOT:%.*]] = getelementptr inbounds {}**, {}*** %ptls, i32 0
 ; CHECK-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
 ; CHECK-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}***
 ; CHECK-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8
@@ -46,8 +46,7 @@ top:
   call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
 ; CHECK-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
 ; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
-; CHECK-NEXT: [[GCFRAME_SLOT3:%.*]] = getelementptr inbounds {}**, {}*** %ptls, i32 0
-; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT3]] to {} addrspace(10)**
+; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)**
 ; CHECK-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0
   call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
 ; CHECK-NEXT: ret void
@@ -57,6 +56,7 @@ top:
 define {} addrspace(10)* @gc_alloc_lowering() {
 top:
 ; CHECK-LABEL: @gc_alloc_lowering
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
 ; CHECK: %v = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc
diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll
index c11bb7ae4fe56b..00ea20e504bee3 100644
--- a/test/llvmpasses/gcroots.ll
+++ b/test/llvmpasses/gcroots.ll
@@ -4,12 +4,14 @@
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
 
 define void @simple(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @simple
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 ; CHECK: call {} addrspace(10)* @jl_box_int64
@@ -33,6 +35,7 @@ define void @leftover_alloca({} addrspace(10)* %a) {
 ; relying on mem2reg to catch simple cases such as this earlier
 ; CHECK-LABEL: @leftover_alloca
 ; CHECK: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %var = alloca {} addrspace(10)*
     store {} addrspace(10)* %a, {} addrspace(10)** %var
@@ -47,6 +50,7 @@ declare void @union_arg({{} addrspace(10)*, i8})
 
 define void @simple_union() {
 ; CHECK-LABEL: @simple_union
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; CHECK: %a = call { {} addrspace(10)*, i8 } @union_ret()
     %a = call { {} addrspace(10)*, i8 } @union_ret()
@@ -61,6 +65,7 @@ declare void @one_arg_boxed({} addrspace(10)*)
 
 define void @select_simple(i64 %a, i64 %b) {
 ; CHECK-LABEL: @select_simple
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
@@ -74,6 +79,7 @@ define void @phi_simple(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_simple
 ; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %cmp = icmp eq i64 %a, %b
     br i1 %cmp, label %alabel, label %blabel
@@ -96,6 +102,7 @@ declare void @one_arg_decayed(i64 addrspace(12)*)
 define void @select_lift(i64 %a, i64 %b) {
 ; CHECK-LABEL: @select_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -112,6 +119,7 @@ define void @phi_lift(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_lift
 ; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %cmp = icmp eq i64 %a, %b
     br i1 %cmp, label %alabel, label %blabel
@@ -133,6 +141,7 @@ common:
 define void @phi_lift_union(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_lift_union
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %cmp = icmp eq i64 %a, %b
     br i1 %cmp, label %alabel, label %blabel
@@ -158,6 +167,7 @@ define void @live_if_live_out(i64 %a, i64 %b) {
 ; CHECK-LABEL: @live_if_live_out
 top:
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; The failure case is failing to realize that `aboxed` is live across the first
 ; one_arg_boxed safepoint and putting bboxed in the same root slot
@@ -175,6 +185,7 @@ succ:
 define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 ; CHECK-LABEL: @ret_use
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -185,6 +196,7 @@ define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 define {{} addrspace(10)*, i8} @ret_use_struct() {
 ; CHECK-LABEL: @ret_use_struct
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; CHECK: %aunion = call { {} addrspace(10)*, i8 } @union_ret()
     %aunion = call { {} addrspace(10)*, i8 } @union_ret()
@@ -201,6 +213,7 @@ define i8 @nosafepoint({} addrspace(10)* dereferenceable(16)) {
 ; CHECK-LABEL: @nosafepoint
 ; CHECK-NOT: %gcframe
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %1 = call {}*** @julia.ptls_states()
   %2 = bitcast {}*** %1 to {} addrspace(10)**
   %3 = getelementptr {} addrspace(10)*, {} addrspace(10)** %2, i64 3
@@ -219,6 +232,7 @@ top:
 define void @global_ref() {
 ; CHECK-LABEL: @global_ref
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1)
 ; CHECK: store {} addrspace(10)* %loaded, {} addrspace(10)**
@@ -230,6 +244,7 @@ define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) {
 ; CHECK-LABEL: @no_redundant_rerooting
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -254,6 +269,7 @@ define void @memcpy_use(i64 %a, i64 *%aptr) {
 ; CHECK-LABEL: @memcpy_use
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -270,6 +286,7 @@ define void @gc_preserve(i64 %a) {
 ; CHECK-LABEL: @gc_preserve
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -291,6 +308,7 @@ define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapt
 ; CHECK-LABEL: @gc_preserve_vec
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 6
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8
 ; CHECK-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
@@ -318,6 +336,7 @@ define {} addrspace(10)* @gv_const() {
 ; CHECK-LABEL: @gv_const
 ; CHECK-NOT: %gcframe
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %v10 = load {}*, {}** @gv1, !tbaa !2
     %v1 = addrspacecast {}* %v10 to {} addrspace(10)*
@@ -331,6 +350,7 @@ top:
 define {} addrspace(10)* @vec_jlcallarg({} addrspace(10)*, {} addrspace(10)**, i32) {
 ; CHECK-LABEL: @vec_jlcallarg
 ; CHECK-NOT: %gcframe
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
   %v5 = bitcast {} addrspace(10)** %1 to <2 x {} addrspace(10)*>*
   %v6 = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*>* %v5, align 8
@@ -343,6 +363,7 @@ declare {} addrspace(10) *@alloc()
 define {} addrspace(10)* @vec_loadobj() {
 ; CHECK-LABEL: @vec_loadobj
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
   %obj = call {} addrspace(10) *@alloc()
   %v1 = bitcast {} addrspace(10) * %obj to {} addrspace(10)* addrspace(10)*
@@ -356,6 +377,7 @@ define {} addrspace(10)* @vec_loadobj() {
 define {} addrspace(10)* @vec_gep() {
 ; CHECK-LABEL: @vec_gep
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
   %obj = call {} addrspace(10) *@alloc()
   %obj1 = bitcast {} addrspace(10) * %obj to {} addrspace(10)* addrspace(10)*
@@ -371,6 +393,7 @@ define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) {
 ; CHECK-LABEL: @loopyness
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     br label %header
 
@@ -402,6 +425,7 @@ define {} addrspace(10)* @phi_union(i1 %cond) {
 ; CHECK-LABEL: @phi_union
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   br i1 %cond, label %a, label %b
 
@@ -426,6 +450,7 @@ define {} addrspace(10)* @select_union(i1 %cond) {
 ; CHECK-LABEL: @select_union
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %obj = call {} addrspace(10) *@alloc()
   %aobj = insertvalue {{} addrspace(10)*, i8} undef, {} addrspace(10)* %obj, 0
@@ -441,6 +466,7 @@ define i8 @simple_arrayptr() {
 ; CHECK-LABEL: @simple_arrayptr
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %obj2 = call {} addrspace(10) *@alloc()
@@ -457,6 +483,7 @@ define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecstoreload
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     call void @jl_safepoint()
@@ -470,6 +497,7 @@ define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecphi
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     br i1 %cond, label %A, label %B
 
@@ -495,6 +523,7 @@ define i8 @phi_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @phi_arrayptr
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     br i1 %cond, label %A, label %B
 
@@ -533,6 +562,7 @@ define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     call void @jl_safepoint()
@@ -548,6 +578,7 @@ top:
 define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
@@ -565,6 +596,7 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecvecselect_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
@@ -582,6 +614,7 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 ; CHECK-LABEL: @vecscalarselect_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -600,6 +633,7 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 ; CHECK-LABEL: @scalarvecselect_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -619,6 +653,7 @@ define i8 @select_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @select_arrayptr
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %obj1 = call {} addrspace(10) *@alloc()
     %obj2 = call {} addrspace(10) *@alloc()
@@ -648,6 +683,7 @@ define i8 @vector_arrayptrs() {
 ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 ;
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11) *
@@ -669,6 +705,7 @@ define i8 @masked_arrayptrs() {
 ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 ;
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11) *
@@ -690,6 +727,7 @@ define i8 @gather_arrayptrs() {
 ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 ;
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)*
@@ -710,6 +748,7 @@ define i8 @gather_arrayptrs_alltrue() {
 ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 ;
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)*
@@ -728,6 +767,7 @@ define i8 @lost_select_decayed(i1 %arg1) {
 ; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
 ; CHECK: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]]
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %obj1 = call {} addrspace(10) *@alloc()
     %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)*
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index 29f889031b629e..a7b8dc7caee38d 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -5,6 +5,7 @@
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
 declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
@@ -14,8 +15,8 @@ define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
 ; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-    %ptls = call {}*** @julia.ptls_states()
-; CHECK: %ptls = call {}*** @julia.ptls_states()
+; CHECK:  %pgcstack = call {}*** @julia.get_pgcstack()
+    %pgcstack = call {}*** @julia.get_pgcstack()
 ; CHECK-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
 ; CHECK-NEXT: call {} addrspace(10)* @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
@@ -37,6 +38,7 @@ top:
 define {} addrspace(10)* @gc_alloc_lowering() {
 top:
 ; CHECK-LABEL: @gc_alloc_lowering
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %ptls_i8 = bitcast {}*** %ptls to i8*
 ; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8)
@@ -56,6 +58,7 @@ top:
 define void @gc_drop_aliasing() {
 top:
 ; CHECK-LABEL: @gc_drop_aliasing
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %ptls_i8 = bitcast {}*** %ptls to i8*
 ; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8)
@@ -79,7 +82,7 @@ define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
 top:
 ; CHECK-LABEL: @callee_root
 ; CHECK-NOT: @julia.new_gc_frame
-  %v2 = call {}*** @julia.ptls_states()
+  %v2 = call {}*** @julia.get_pgcstack()
   %v3 = bitcast {} addrspace(10)* %v0 to {} addrspace(10)* addrspace(10)*
   %v4 = addrspacecast {} addrspace(10)* addrspace(10)* %v3 to {} addrspace(10)* addrspace(11)*
   %v5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v4 unordered, align 8
diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll
index 42e768ee132a7a..d9d5ac087b7737 100644
--- a/test/llvmpasses/lower-handlers.ll
+++ b/test/llvmpasses/lower-handlers.ll
@@ -4,10 +4,11 @@ attributes #1 = { returns_twice }
 declare i32 @julia.except_enter() #1
 declare void @jl_pop_handler(i32)
 declare i8**** @julia.ptls_states()
+declare i8**** @julia.get_pgcstack()
 
 define void @simple() {
 top:
-    %ptls = call i8**** @julia.ptls_states()
+    %pgcstack = call i8**** @julia.get_pgcstack()
 ; CHECK: call void @llvm.lifetime.start
 ; CHECK: call void @jl_enter_handler
 ; CHECK: setjmp
diff --git a/test/llvmpasses/noinline.jl b/test/llvmpasses/noinline.jl
index f542968b219794..c4aa22bf80a71b 100644
--- a/test/llvmpasses/noinline.jl
+++ b/test/llvmpasses/noinline.jl
@@ -17,5 +17,5 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
     return A + B
 end
 
-# CHECK: attributes #{{[0-9]+}} = {{{([a-z]+ )*}} noinline {{([a-z]+ )*}}}
+# CHECK: attributes #{{[0-9]+}} = {{{[^}]*}} noinline {{[^}]*}}}
 emit(simple_noinline, Float64, Float64)
diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll
index 37212a512d68f8..b883a53554a0c9 100644
--- a/test/llvmpasses/refinements.ll
+++ b/test/llvmpasses/refinements.ll
@@ -2,6 +2,7 @@
 
 
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare void @one_arg_boxed({} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
@@ -9,6 +10,7 @@ declare {} addrspace(10)* @jl_box_int64(i64)
 define void @argument_refinement({} addrspace(10)* %a) {
 ; CHECK-LABEL: @argument_refinement
 ; CHECK-NOT: %gcframe
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %casted1 = bitcast {} addrspace(10)* %a to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
@@ -22,6 +24,7 @@ define void @argument_refinement({} addrspace(10)* %a) {
 define void @heap_refinement1(i64 %a) {
 ; CHECK-LABEL: @heap_refinement1
 ; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
@@ -38,6 +41,7 @@ define void @heap_refinement1(i64 %a) {
 define void @heap_refinement2(i64 %a) {
 ; CHECK-LABEL: @heap_refinement2
 ; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
@@ -55,6 +59,7 @@ declare {} addrspace(10)* @allocate_some_value()
 define void @issue22770() {
 ; CHECK-LABEL: @issue22770
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %y = call {} addrspace(10)* @allocate_some_value()
     %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)*
@@ -80,6 +85,7 @@ define void @refine_select_phi({} addrspace(10)* %x, {} addrspace(10)* %y, i1 %b
 ; CHECK-LABEL: @refine_select_phi
 ; CHECK-NOT: %gcframe
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %s = select i1 %b, {} addrspace(10)* %x, {} addrspace(10)* %y
   br i1 %b, label %L1, label %L2
@@ -101,6 +107,7 @@ define void @dont_refine_loop({} addrspace(10)* %x) {
 ; CHECK-LABEL: @dont_refine_loop
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   br label %L1
 
@@ -122,6 +129,7 @@ define void @refine_loop_const({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_const
 ; CHECK-NOT: %gcframe
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   br label %L1
 
@@ -142,6 +150,7 @@ define void @refine_loop_indirect({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %a = call {} addrspace(10)* @allocate_some_value()
   br label %L1
@@ -166,6 +175,7 @@ define void @refine_loop_indirect2({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect2
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %a = call {} addrspace(10)* @allocate_some_value()
   br label %L1
@@ -189,6 +199,7 @@ declare {} addrspace(10)* @julia.typeof({} addrspace(10)*) #0
 define {} addrspace(10)* @typeof({} addrspace(10)* %x) {
 ; CHECK-LABEL: @typeof(
 ; CHECK-NOT: %gcframe
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %v = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %x)
   call void @one_arg_boxed({} addrspace(10)* %v)
@@ -201,6 +212,7 @@ define {} addrspace(10)* @setfield({} addrspace(10)* %p) {
 ; CHECK-LABEL: @setfield(
 ; CHECK-NOT: %gcframe
 ; CHECK: call void @jl_gc_queue_root
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %c = call {} addrspace(10)* @allocate_some_value()
   %fp = bitcast {} addrspace(10)* %p to {} addrspace(10)* addrspace(10)*
diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll
index c542fd026ff81d..da281fe85fd57e 100644
--- a/test/llvmpasses/returnstwicegc.ll
+++ b/test/llvmpasses/returnstwicegc.ll
@@ -4,6 +4,7 @@
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare i32 @sigsetjmp(i8*, i32) returns_twice
 declare void @one_arg_boxed({} addrspace(10)*)
 
@@ -14,6 +15,7 @@ define void @try_catch(i64 %a, i64 %b)
 top:
     %sigframe = alloca [208 x i8], align 16
     %sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0
+    call {}*** @julia.get_pgcstack()
     call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 %a)
     %val = call i32 @sigsetjmp(i8 *%sigframe.sub, i32 0) returns_twice
diff --git a/test/llvmpasses/safepoint_stress.jl b/test/llvmpasses/safepoint_stress.jl
index 7ff96643e82c3c..c5345ad07e786e 100644
--- a/test/llvmpasses/safepoint_stress.jl
+++ b/test/llvmpasses/safepoint_stress.jl
@@ -6,8 +6,10 @@ println("""
 declare {} addrspace(10)* @alloc()
 declare void @one_arg_boxed({} addrspace(10)*)
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 
 define void @stress(i64 %a, i64 %b) {
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 """)
 
diff --git a/test/loading.jl b/test/loading.jl
index 377087a63f558d..c56f6c463a21f5 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -314,6 +314,11 @@ module NotPkgModule; end
         @test pkgdir(Foo.SubFoo1) == normpath(abspath(@__DIR__, "project/deps/Foo1"))
         @test pkgdir(Foo.SubFoo2) == normpath(abspath(@__DIR__, "project/deps/Foo1"))
         @test pkgdir(NotPkgModule) === nothing
+
+        @test pkgdir(Foo, "src") == normpath(abspath(@__DIR__, "project/deps/Foo1/src"))
+        @test pkgdir(Foo.SubFoo1, "src") == normpath(abspath(@__DIR__, "project/deps/Foo1/src"))
+        @test pkgdir(Foo.SubFoo2, "src") == normpath(abspath(@__DIR__, "project/deps/Foo1/src"))
+        @test pkgdir(NotPkgModule, "src") === nothing
     end
 
 end
@@ -718,3 +723,36 @@ import .Foo.Libdl; import Libdl
         end
     end
 end
+
+@testset "`Base.project_names` and friends" begin
+    # Some functions in Pkg assumes that these tuples have the same length
+    n = length(Base.project_names)
+    @test length(Base.manifest_names) == n
+    @test length(Base.preferences_names) == n
+end
+
+@testset "Manifest formats" begin
+    deps = Dict{String,Any}(
+        "Serialization" => Any[Dict{String, Any}("uuid"=>"9e88b42a-f829-5b0c-bbe9-9e923198166b")],
+        "Random"        => Any[Dict{String, Any}("deps"=>["Serialization"], "uuid"=>"9a3f8284-a2c9-5f02-9a11-845980a1fd5c")],
+        "Logging"       => Any[Dict{String, Any}("uuid"=>"56ddb016-857b-54e1-b83d-db4d58db5568")]
+    )
+
+    @testset "v1.0" begin
+        env_dir = joinpath(@__DIR__, "manifest", "v1.0")
+        manifest_file = joinpath(env_dir, "Manifest.toml")
+        isfile(manifest_file) || error("Reference manifest is missing")
+        raw_manifest = Base.parsed_toml(manifest_file)
+        @test Base.is_v1_format_manifest(raw_manifest)
+        @test Base.get_deps(raw_manifest) == deps
+    end
+
+    @testset "v2.0" begin
+        env_dir = joinpath(@__DIR__, "manifest", "v2.0")
+        manifest_file = joinpath(env_dir, "Manifest.toml")
+        isfile(manifest_file) || error("Reference manifest is missing")
+        raw_manifest = Base.parsed_toml(manifest_file)
+        @test Base.is_v1_format_manifest(raw_manifest) == false
+        @test Base.get_deps(raw_manifest) == deps
+    end
+end
diff --git a/test/manifest/v1.0/Manifest.toml b/test/manifest/v1.0/Manifest.toml
new file mode 100644
index 00000000000000..758314a2f5f6a4
--- /dev/null
+++ b/test/manifest/v1.0/Manifest.toml
@@ -0,0 +1,11 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[Random]]
+deps = ["Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
diff --git a/test/manifest/v2.0/Manifest.toml b/test/manifest/v2.0/Manifest.toml
new file mode 100644
index 00000000000000..f999fd6efb1c4b
--- /dev/null
+++ b/test/manifest/v2.0/Manifest.toml
@@ -0,0 +1,14 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.7.0-DEV.1199"
+manifest_format = "2.0"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[deps.Random]]
+deps = ["Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
diff --git a/test/math.jl b/test/math.jl
index 074358da9c7915..67522b9be4c7bd 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -196,11 +196,13 @@ end
             @test isequal(cos(T(0)), T(1))
             @test cos(T(pi)/2) ≈ T(0) atol=eps(T)
             @test isequal(cos(T(pi)), T(-1))
-            @test exp(T(1)) ≈ T(ℯ) atol=10*eps(T)
+            @test exp(T(1)) ≈ T(ℯ) atol=2*eps(T)
             @test isequal(exp10(T(1)), T(10))
             @test isequal(exp2(T(1)), T(2))
             @test isequal(expm1(T(0)), T(0))
-            @test expm1(T(1)) ≈ T(ℯ)-1 atol=10*eps(T)
+            @test isequal(expm1(-floatmax(T)), -one(T))
+            @test isequal(expm1(floatmax(T)), T(Inf))
+            @test expm1(T(1)) ≈ T(ℯ)-1 atol=2*eps(T)
             @test isequal(hypot(T(3),T(4)), T(5))
             @test isequal(hypot(floatmax(T),T(1)),floatmax(T))
             @test isequal(hypot(floatmin(T)*sqrt(eps(T)),T(0)),floatmin(T)*sqrt(eps(T)))
@@ -286,6 +288,13 @@ end
             @test tanh(T(Inf)) === T(1)
         end
     end
+    @testset "Float16 expm1" begin
+        T=Float16
+        @test isequal(expm1(T(0)), T(0))
+        @test isequal(expm1(-floatmax(T)), -one(T))
+        @test isequal(expm1(floatmax(T)), T(Inf))
+        @test expm1(T(1)) ≈ T(ℯ)-1 atol=2*eps(T)
+    end
 end
 
 @testset "exp function" for T in (Float64, Float32)
@@ -341,6 +350,35 @@ end
     @test Array(acosh.(STAA)) == acosh.(TAA)
     @test Array(acsch.(STAA)) == acsch.(TAA)
     @test Array(acoth.(STAA)) == acoth.(TAA)
+    @test sind(TAA) == sin(deg2rad.(TAA))
+    @test cosd(TAA) == cos(deg2rad.(TAA))
+    @test tand(TAA) == tan(deg2rad.(TAA))
+    @test asind(TAA) == rad2deg.(asin(TAA))
+    @test acosd(TAA) == rad2deg.(acos(TAA))
+    @test atand(TAA) == rad2deg.(atan(TAA))
+    @test asecd(TAA) == rad2deg.(asec(TAA))
+    @test acscd(TAA) == rad2deg.(acsc(TAA))
+    @test acotd(TAA) == rad2deg.(acot(TAA))
+
+    m = rand(3,2) # not square matrix
+    ex = @test_throws DimensionMismatch sind(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch cosd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch tand(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch asind(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch acosd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch atand(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch asecd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch acscd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch acotd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
 end
 
 @testset "check exp2(::Integer) matches exp2(::Float)" begin
diff --git a/test/meta.jl b/test/meta.jl
index ab073668677c69..5bdb988f41b6da 100644
--- a/test/meta.jl
+++ b/test/meta.jl
@@ -241,7 +241,29 @@ ci = code_lowered(f, Tuple{Int})[1]
 
 g(::Val{x}) where {x} = x ? 1 : 0
 ci = code_lowered(g, Tuple{Val{true}})[1]
-@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[Val{true}], 0, 0, :propagate)[1] ==
-   Core.GotoIfNot(QuoteNode(Val{true}), 3)
-@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[Val{true}], 0, 2, :propagate)[1] ==
-   Core.GotoIfNot(QuoteNode(Val{true}), 5)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 0, :propagate)[1] ==
+   Core.GotoIfNot(QuoteNode(true), 3)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 2, :propagate)[1] ==
+   Core.GotoIfNot(QuoteNode(true), 5)
+
+@testset "inlining with isdefined" begin
+    isdefined_slot(x) = @isdefined(x)
+    ci = code_lowered(isdefined_slot, Tuple{Int})[1]
+    @test Meta.partially_inline!(copy(ci.code), [], Tuple{typeof(isdefined_slot), Int},
+                                 [], 0, 0, :propagate)[1] == Expr(:isdefined, Core.SlotNumber(2))
+    @test Meta.partially_inline!(copy(ci.code), [isdefined_slot, 1], Tuple{typeof(isdefined_slot), Int},
+                                 [], 0, 0, :propagate)[1] == true
+
+    isdefined_sparam(::T) where {T} = @isdefined(T)
+    ci = code_lowered(isdefined_sparam, Tuple{Int})[1]
+    @test Meta.partially_inline!(copy(ci.code), [], Tuple{typeof(isdefined_sparam), Int},
+                                 Any[Int], 0, 0, :propagate)[1] == true
+    @test Meta.partially_inline!(copy(ci.code), [], Tuple{typeof(isdefined_sparam), Int},
+                                 [], 0, 0, :propagate)[1] == Expr(:isdefined, Expr(:static_parameter, 1))
+
+    @eval isdefined_globalref(x) = $(Expr(:isdefined, GlobalRef(Base, :foo)))
+    ci = code_lowered(isdefined_globalref, Tuple{Int})[1]
+    @test Meta.partially_inline!(copy(ci.code), Any[isdefined_globalref, 1], Tuple{typeof(isdefined_globalref), Int},
+                                 [], 0, 0, :propagate)[1] == Expr(:isdefined, GlobalRef(Base, :foo))
+
+end
diff --git a/test/misc.jl b/test/misc.jl
index da9591ed2770dc..94c35c43ffaec5 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -182,6 +182,17 @@ end
 
 @test_throws ErrorException("deadlock detected: cannot wait on current task") wait(current_task())
 
+# issue #41347
+let t = @async 1
+    wait(t)
+    @test_throws ErrorException yield(t)
+end
+
+let t = @async error(42)
+    Base._wait(t)
+    @test_throws ErrorException("42") yieldto(t)
+end
+
 # test that @sync is lexical (PR #27164)
 
 const x27164 = Ref(0)
@@ -252,6 +263,22 @@ function timev_macro_scope()
 end
 @test timev_macro_scope() == 1
 
+before = Base.cumulative_compile_time_ns_before();
+
+# exercise concurrent calls to `@time` for reentrant compilation time measurement.
+t1 = @async @time begin
+    sleep(2)
+    @eval module M ; f(x,y) = x+y ; end
+    @eval M.f(2,3)
+end
+t2 = @async begin
+    sleep(1)
+    @time 2 + 2
+end
+
+after = Base.cumulative_compile_time_ns_after();
+@test after >= before;
+
 # interactive utilities
 
 struct ambigconvert; end # inject a problematic `convert` method to ensure it still works
@@ -291,6 +318,11 @@ let vec = vcat(missing, ones(100000))
     @test length(unique(summarysize(vec) for i = 1:20)) == 1
 end
 
+# issue #40773
+let s = Set(1:100)
+    @test summarysize([s]) > summarysize(s)
+end
+
 # issue #13021
 let ex = try
     Main.x13021 = 0
@@ -621,6 +653,26 @@ let buf = IOBuffer()
     # Check that boldness is turned off
     printstyled(buf_color, "foo"; bold=true, color=:red)
     @test String(take!(buf)) == "\e[31m\e[1mfoo\e[22m\e[39m"
+
+    # Check that underline is turned off
+    printstyled(buf_color, "foo"; color = :red, underline = true)
+    @test String(take!(buf)) == "\e[31m\e[4mfoo\e[24m\e[39m"
+
+    # Check that blink is turned off
+    printstyled(buf_color, "foo"; color = :red, blink = true)
+    @test String(take!(buf)) == "\e[31m\e[5mfoo\e[25m\e[39m"
+
+    # Check that reverse is turned off
+    printstyled(buf_color, "foo"; color = :red, reverse = true)
+    @test String(take!(buf)) == "\e[31m\e[7mfoo\e[27m\e[39m"
+
+    # Check that hidden is turned off
+    printstyled(buf_color, "foo"; color = :red, hidden = true)
+    @test String(take!(buf)) == "\e[31m\e[8mfoo\e[28m\e[39m"
+
+    # Check that all options can be turned on simultaneously
+    printstyled(buf_color, "foo"; color = :red, bold = true, underline = true, blink = true, reverse = true, hidden = true)
+    @test String(take!(buf)) == "\e[31m\e[1m\e[4m\e[5m\e[7m\e[8mfoo\e[28m\e[27m\e[25m\e[24m\e[22m\e[39m"
 end
 
 abstract type DA_19281{T, N} <: AbstractArray{T, N} end
@@ -964,3 +1016,5 @@ let script = :(let ptr = Ptr{Cint}(ccall(:jl_mmap, Ptr{Cvoid},
     @test !success(`$(Base.julia_cmd()) -e $script`)
 end
 
+# issue #41656
+@test success(`$(Base.julia_cmd()) -e 'isempty(x) = true'`)
diff --git a/test/missing.jl b/test/missing.jl
index 4c704c4ce64f89..0be8cb8ec9be41 100644
--- a/test/missing.jl
+++ b/test/missing.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
 @testset "MissingException" begin
     @test sprint(showerror, MissingException("test")) == "MissingException: test"
 end
@@ -83,7 +86,7 @@ end
     arithmetic_operators = [+, -, *, /, ^, Base.div, Base.mod, Base.fld, Base.rem]
 
     # All unary operators return missing when evaluating missing
-    for f in [!, ~, +, -, *, &, |, xor]
+    for f in [!, ~, +, -, *, &, |, xor, nand, nor]
         @test ismissing(f(missing))
     end
 
@@ -128,6 +131,22 @@ end
     @test ismissing(xor(true, missing))
     @test ismissing(xor(missing, false))
     @test ismissing(xor(false, missing))
+    @test ismissing(nand(missing, true))
+    @test ismissing(nand(true, missing))
+    @test nand(missing, false) == true
+    @test nand(false, missing) == true
+    @test ismissing(⊼(missing, true))
+    @test ismissing(⊼(true, missing))
+    @test ⊼(missing, false) == true
+    @test ⊼(false, missing) == true
+    @test nor(missing, true) == false
+    @test nor(true, missing) == false
+    @test ismissing(nor(missing, false))
+    @test ismissing(nor(false, missing))
+    @test ⊽(missing, true) == false
+    @test ⊽(true, missing) == false
+    @test ismissing(⊽(missing, false))
+    @test ismissing(⊽(false, missing))
 
     @test ismissing(missing & 1)
     @test ismissing(1 & missing)
@@ -135,11 +154,21 @@ end
     @test ismissing(1 | missing)
     @test ismissing(xor(missing, 1))
     @test ismissing(xor(1, missing))
+    @test ismissing(nand(missing, 1))
+    @test ismissing(nand(1, missing))
+    @test ismissing(⊼(missing, 1))
+    @test ismissing(⊼(1, missing))
+    @test ismissing(nor(missing, 1))
+    @test ismissing(nor(1, missing))
+    @test ismissing(⊽(missing, 1))
+    @test ismissing(⊽(1, missing))
 end
 
-@testset "* string concatenation" begin
+@testset "* string/char concatenation" begin
     @test ismissing("a" * missing)
+    @test ismissing('a' * missing)
     @test ismissing(missing * "a")
+    @test ismissing(missing * 'a')
 end
 
 # Emulate a unitful type such as Dates.Minute
@@ -436,10 +465,10 @@ end
             @test_throws BoundsError x[3, 1]
             @test findfirst(==(2), x) === nothing
             @test isempty(findall(==(2), x))
-            @test_throws ArgumentError argmin(x)
-            @test_throws ArgumentError findmin(x)
-            @test_throws ArgumentError argmax(x)
-            @test_throws ArgumentError findmax(x)
+            @test_throws "reducing over an empty collection is not allowed" argmin(x)
+            @test_throws "reducing over an empty collection is not allowed" findmin(x)
+            @test_throws "reducing over an empty collection is not allowed" argmax(x)
+            @test_throws "reducing over an empty collection is not allowed" findmax(x)
         end
     end
 
@@ -496,14 +525,27 @@ end
         for n in 0:3
             itr = skipmissing(Vector{Union{Int,Missing}}(fill(missing, n)))
             @test sum(itr) == reduce(+, itr) == mapreduce(identity, +, itr) === 0
-            @test_throws ArgumentError reduce(x -> x/2, itr)
-            @test_throws ArgumentError mapreduce(x -> x/2, +, itr)
+            @test_throws "reducing over an empty collection is not allowed" reduce(x -> x/2, itr)
+            @test_throws "reducing over an empty collection is not allowed" mapreduce(x -> x/2, +, itr)
         end
 
         # issue #35504
         nt = NamedTuple{(:x, :y),Tuple{Union{Missing, Int},Union{Missing, Float64}}}(
             (missing, missing))
         @test sum(skipmissing(nt)) === 0
+
+        # issues #38627 and #124
+        @testset for len in [1, 2, 15, 16, 1024, 1025]
+            v = repeat(Union{Int,Missing}[1], len)
+            oa = OffsetArray(v, typemax(Int)-length(v))
+            sm = skipmissing(oa)
+            @test sum(sm) == len
+
+            v = repeat(Union{Int,Missing}[missing], len)
+            oa = OffsetArray(v, typemax(Int)-length(v))
+            sm = skipmissing(oa)
+            @test sum(sm) == 0
+        end
     end
 
     @testset "filter" begin
@@ -533,6 +575,16 @@ end
     @test coalesce(missing, nothing) === nothing
 end
 
+@testset "@coalesce" begin
+    @test @coalesce() === missing
+    @test @coalesce(1) === 1
+    @test @coalesce(nothing) === nothing
+    @test @coalesce(missing) === missing
+
+    @test @coalesce(1, error("failed")) === 1
+    @test_throws ErrorException @coalesce(missing, error("failed"))
+end
+
 mutable struct Obj; x; end
 @testset "weak references" begin
     @noinline function mk_wr(r, wr)
@@ -577,4 +629,4 @@ end
     @test isequal(sort(X, alg=MergeSort, rev=true), XRP)
 end
 
-sortperm(reverse([NaN, missing, NaN, missing]))
\ No newline at end of file
+sortperm(reverse([NaN, missing, NaN, missing]))
diff --git a/test/mpfr.jl b/test/mpfr.jl
index 86c7d345f49fd3..cbaa69761a4e35 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -338,23 +338,6 @@ end
     @test *(a, b, c, d, f) == parse(BigFloat,"5.214588134765625e+04")
     @test *(a, b, c, d, f, g) == parse(BigFloat,"1.6295587921142578125e+03")
 end
-@testset "< / > / <= / >=" begin
-    x = BigFloat(12)
-    y = BigFloat(42)
-    z = BigFloat(30)
-    @test y > x
-    @test y >= x
-    @test y > z
-    @test y >= z
-    @test x < y
-    @test x <= y
-    @test z < y
-    @test z <= y
-    @test y - x >= z
-    @test y - x <= z
-    @test !(x >= z)
-    @test !(y <= z)
-end
 @testset "rounding modes" begin
     setprecision(4) do
         # default mode is round to nearest
@@ -371,7 +354,6 @@ end
         end
     end
 end
-
 @testset "copysign / sign" begin
     x = BigFloat(1)
     y = BigFloat(-1)
@@ -473,10 +455,11 @@ end
     @test isnan(nextfloat(BigFloat(NaN), 1))
     @test isnan(prevfloat(BigFloat(NaN), 1))
 end
+
 # sqrt DomainError
 @test_throws DomainError sqrt(BigFloat(-1))
 
-@testset "precision" begin
+@testset "setprecision" begin
     old_precision = precision(BigFloat)
     x = BigFloat(0)
     @test precision(x) == old_precision
@@ -492,7 +475,8 @@ end
     @test precision(z) == 240
     x = BigFloat(12)
     @test precision(x) == old_precision
-    @test_throws DomainError setprecision(1)
+    @test precision(setprecision(1) do; BigFloat(23); end) == 1  # minimum-precision
+    @test_throws DomainError setprecision(0)
     @test_throws DomainError BigFloat(1, precision = 0)
     @test_throws DomainError BigFloat(big(1.1), precision = 0)
     @test_throws DomainError BigFloat(2.5, precision = -900)
@@ -512,7 +496,6 @@ end
     @test !isinteger(-BigFloat(Inf))
     @test !isinteger(BigFloat(NaN))
 end
-
 @testset "comparisons" begin
     x = BigFloat(1)
     y = BigFloat(-1)
@@ -521,9 +504,11 @@ end
     imi = BigFloat(-Inf)
     @test x > y
     @test x >= y
+    @test !(y >= x)
     @test x >= x
     @test y < x
     @test y <= x
+    @test !(x <= y)
     @test y <= y
     @test x < ipl
     @test x <= ipl
@@ -622,7 +607,8 @@ end
         @test log(x) == log(42)
         @test isinf(log(BigFloat(0)))
         @test_throws DomainError log(BigFloat(-1))
-        @test log2(x) == log2(42)
+        # issue #41450
+        @test_skip log2(x) == log2(42)
         @test isinf(log2(BigFloat(0)))
         @test_throws DomainError log2(BigFloat(-1))
         @test log10(x) == log10(42)
@@ -675,14 +661,17 @@ end
     end
     setprecision(21) do
         @test string(parse(BigFloat, "0.1")) == "0.10000002"
+        @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.9000015"
     end
     setprecision(40) do
         @test string(parse(BigFloat, "0.1")) == "0.10000000000002"
+        @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.8999999999942"
     end
     setprecision(123) do
         @test string(parse(BigFloat, "0.1")) == "0.0999999999999999999999999999999999999953"
+        @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.8999999999999999999999999999999999997"
     end
 end
@@ -927,6 +916,7 @@ end
     @test i3+1 > f
     @test i3+1 >= f
 end
+
 # issue #8318
 @test convert(Int64,big(500_000_000_000_000.)) == 500_000_000_000_000
 
@@ -935,6 +925,7 @@ end
     @test MPFR.get_emin() == MPFR.get_emin_min()
     @test MPFR.get_emax() == MPFR.get_emax_max()
 end
+
 # issue #10994: handle embedded NUL chars for string parsing
 @test_throws ArgumentError parse(BigFloat, "1\0")
 
@@ -1025,7 +1016,6 @@ end
         @test to_string(big"-1.0") == "-1.0"
     end
 end
-
 @testset "big(::Type)" begin
     for x in (2f0, pi, 7.8, big(ℯ))
         @test big(typeof(x)) == typeof(big(x))
diff --git a/test/numbers.jl b/test/numbers.jl
index a76f5726492df5..aae3b5cf970b17 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -38,6 +38,24 @@ const ≣ = isequal # convenient for comparing NaNs
     @test xor(true,  false) == true
     @test xor(false, true)  == true
     @test xor(true,  true)  == false
+
+    @test false ⊼ false == true
+    @test true ⊼ false == true
+    @test false ⊼ true == true
+    @test true ⊼ true == false
+    @test nand(false, false) == true
+    @test nand(true, false) == true
+    @test nand(false, true) == true
+    @test nand(true, true) == false
+
+    @test false ⊽ false == true
+    @test true ⊽ false == false
+    @test false ⊽ true == false
+    @test true ⊽ true == false
+    @test nor(false, false) == true
+    @test nor(true, false) == false
+    @test nor(false, true) == false
+    @test nor(true, true) == false
 end
 @testset "bool operator" begin
     @test Bool(false) == false
@@ -2289,6 +2307,23 @@ end
         @test_throws BoundsError getindex(x, 1, 0)
     end
 end
+@testset "get(x::Number, ...)" begin
+    for x in [1.23, 7, ℯ, 4//5] #[FP, Int, Irrational, Rat]
+        @test get(x, 1, 99) == x
+        @test get(x, (), 99) == x
+        @test get(x, (1,), 99) == x
+        @test get(x, 2, 99) == 99
+        @test get(x, 0, pi) == pi
+        @test get(x, (1,2), pi) == pi
+        c = Ref(0)
+        @test get(() -> c[]+=1, x, 1) == x
+        @test get(() -> c[]+=1, x, ()) == x
+        @test get(() -> c[]+=1, x, (1,1,1)) == x
+        @test get(() -> c[]+=1, x, 2) == 1
+        @test get(() -> c[]+=1, x, -1) == 2
+        @test get(() -> c[]+=1, x, (3,2,1)) == 3
+    end
+end
 @testset "copysign and flipsign" begin
     # copysign(x::Real, y::Real) = ifelse(signbit(x)!=signbit(y), -x, x)
     # flipsign(x::Real, y::Real) = ifelse(signbit(y), -x, x)
@@ -2606,6 +2641,10 @@ end
     @test !isone(triu(fill(1, 5, 5)))
     @test !isone(zeros(Int, 5, 5))
     @test isone(Matrix(1I, 5, 5))
+    @test !isone(view(rand(5,5), [1,3,4], :))
+    Dv = view(Diagonal([1,1, 1]), [1,2], 1:2)
+    @test isone(Dv)
+    @test (@allocated isone(Dv)) == 0
     @test isone(Matrix(1I, 1000, 1000)) # sizeof(X) > 2M == ISONE_CUTOFF
 end
 
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index 5deb442f36222f..7621e140136275 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -2,6 +2,7 @@
 
 isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
 using .Main.OffsetArrays
+import .Main.OffsetArrays: IdOffsetRange
 using DelimitedFiles
 using Random
 using LinearAlgebra
@@ -231,11 +232,11 @@ targets1 = ["0-dimensional OffsetArray(::Array{Float64, 0}) with eltype Float64:
             "1×1×1×1 OffsetArray(::Array{Float64, 4}, 2:2, 3:3, 4:4, 5:5) with eltype Float64 with indices 2:2×3:3×4:4×5:5:\n[:, :, 4, 5] =\n 1.0"]
 targets2 = ["(fill(1.0), fill(1.0))",
             "([1.0], [1.0])",
-            "([1.0], [1.0])",
-            "([1.0], [1.0])",
-            "([1.0], [1.0])"]
+            "([1.0;;], [1.0;;])",
+            "([1.0;;;], [1.0;;;])",
+            "([1.0;;;;], [1.0;;;;])"]
 @testset "printing of OffsetArray with n=$n" for n = 0:4
-    a = OffsetArray(fill(1.,ntuple(d->1,n)), ntuple(identity,n))
+    a = OffsetArray(fill(1.,ntuple(Returns(1),n)), ntuple(identity,n))
     show(IOContext(io, :limit => true), MIME("text/plain"), a)
     @test String(take!(io)) == targets1[n+1]
     show(IOContext(io, :limit => true), MIME("text/plain"), (a,a))
@@ -776,3 +777,48 @@ end
     strY = String(take!(io))
     @test strX == strY
 end
+
+@testset "vector indexing (issue #39896)" begin
+    a = collect(1:10)
+    r = Base.IdentityUnitRange(2:3)
+    b = a[r]
+    @test axes(b) == axes(r)
+    for i in r
+        @test b[i] == a[r[i]]
+    end
+end
+
+@testset "proper patition for non-1-indexed vector" begin
+    @test Iterators.partition(OffsetArray(1:10,10), 5) |> collect == [1:5,6:10] # OffsetVector
+    @test Iterators.partition(OffsetArray(collect(1:10),10), 5) |> collect == [1:5,6:10] # OffsetVector
+    @test Iterators.partition(OffsetArray(reshape(1:9,3,3), (3,3)), 5) |> collect == [1:5,6:9] #OffsetMatrix
+    @test Iterators.partition(OffsetArray(reshape(collect(1:9),3,3), (3,3)), 5) |> collect == [1:5,6:9] #OffsetMatrix
+    @test Iterators.partition(IdOffsetRange(2:7,10), 5) |> collect == [12:16,17:17] # IdOffsetRange
+end
+
+@testset "reshape" begin
+    a = OffsetArray(4:5, 5:6)
+    @test reshape(a, :) === a
+    @test reshape(a, (:,)) === a
+end
+
+@testset "issue #41630: replace_ref_begin_end!/@view on offset-like arrays" begin
+    x = OffsetArray([1 2; 3 4], -10:-9, 9:10)  # 2×2 OffsetArray{...} with indices -10:-9×9:10
+
+    # begin/end with offset indices
+    @test (@view x[begin, 9])[] == 1
+    @test (@view x[-10, end])[] == 2
+    @test (@view x[-9, begin])[] == 3
+    @test (@view x[end, 10])[] == 4
+    @test (@view x[begin, begin])[] == 1
+    @test (@view x[begin, end])[] == 2
+    @test (@view x[end, begin])[] == 3
+    @test (@view x[end, end])[] == 4
+
+    # nested usages of begin/end
+    y = OffsetArray([-10, -9], (5,))
+    @test (@view x[begin, -y[end]])[] == 1
+    @test (@view x[y[begin], end])[] == 2
+    @test (@view x[end, -y[end]])[] == 3
+    @test (@view x[y[end], end])[] == 4
+end
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
index ed7added3751f3..11e4929c36edda 100644
--- a/test/opaque_closure.jl
+++ b/test/opaque_closure.jl
@@ -192,7 +192,7 @@ end
 
 # OpaqueClosure ABI
 f_oc_noinline(x) = @opaque function (y)
-    @Base._noinline_meta
+    @noinline
     x + y
 end
 
@@ -204,3 +204,5 @@ function f_oc_noinline_call(x, y)
     return f_oc_noinline(x)(y)
 end
 @test f_oc_noinline_call(1, 2) == 3
+
+@test_throws MethodError (@opaque x->x+1)(1, 2)
diff --git a/test/operators.jl b/test/operators.jl
index 070a924b41cdc7..d07f3382f53a5b 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -281,3 +281,17 @@ end
 end
 
 @test [Base.afoldl(+, 1:i...) for i = 1:40] == [i * (i + 1) ÷ 2 for i = 1:40]
+
+@testset "Returns" begin
+    @test @inferred(Returns(1)()   ) === 1
+    @test @inferred(Returns(1)(23) ) === 1
+    @test @inferred(Returns("a")(2,3)) == "a"
+    @test @inferred(Returns(1)(x=1, y=2)) === 1
+    @test @inferred(Returns(Int)()) === Int
+    @test @inferred(Returns(Returns(1))()) === Returns(1)
+    f = @inferred Returns(Int)
+    @inferred f(1,2)
+    val = [1,2,3]
+    @test Returns(val)(1) === val
+    @test sprint(show, Returns(1.0)) == "Returns{Float64}(1.0)"
+end
diff --git a/test/osutils.jl b/test/osutils.jl
index c9e3b9d91a3774..5f597292c5cc91 100644
--- a/test/osutils.jl
+++ b/test/osutils.jl
@@ -44,6 +44,7 @@ end
     @test (@static if false 1 elseif false 2 else 3 end) === 3
     @test (@static if false 1 elseif false 2 elseif true && false 3 else 4 end) === 4
     @test (@static if false 1 elseif false 2 elseif true && false 3 end) === nothing
+    @test_throws ArgumentError("invalid @static macro") @macroexpand @static 1
 end
 
 if Sys.iswindows()
diff --git a/test/path.jl b/test/path.jl
index ca772e24d41dec..31de4baffd1a04 100644
--- a/test/path.jl
+++ b/test/path.jl
@@ -59,6 +59,11 @@
         @test joinpath(S("foo"), S(homedir())) == homedir()
         @test joinpath(S(abspath("foo")), S(homedir())) == homedir()
 
+        for str in map(S, [sep, "a$(sep)b", "a$(sep)b$(sep)c", "a$(sep)b$(sep)c$(sep)d"])
+            @test str == joinpath(splitpath(str))
+            @test joinpath(splitpath(str)) == joinpath(splitpath(str)...)
+        end
+
         if Sys.iswindows()
             @test joinpath(S("foo"),S("bar:baz")) == "bar:baz"
             @test joinpath(S("C:"),S("foo"),S("D:"),S("bar")) == "D:bar"
@@ -75,6 +80,11 @@
             @test joinpath(S("\\\\server\\share"),S("a")) == "\\\\server\\share\\a"
             @test joinpath(S("\\\\server\\share\\"), S("a")) == "\\\\server\\share\\a"
 
+            for str in map(S, ["c:\\", "c:\\a", "c:\\a\\b", "c:\\a\\b\\c", "c:\\a\\b\\c\\d"])
+                @test str == joinpath(splitpath(str))
+                @test joinpath(splitpath(str)) == joinpath(splitpath(str)...)
+            end
+
         elseif Sys.isunix()
             @test joinpath(S("foo"),S("bar:baz")) == "foo$(sep)bar:baz"
             @test joinpath(S("C:"),S("foo"),S("D:"),S("bar")) == "C:$(sep)foo$(sep)D:$(sep)bar"
@@ -290,6 +300,10 @@
             # Additional cases
             @test_throws ArgumentError relpath(S("$(sep)home$(sep)user$(sep)dir_withendsep$(sep)"), "")
             @test_throws ArgumentError relpath(S(""), S("$(sep)home$(sep)user$(sep)dir_withendsep$(sep)"))
+
+            # issue 40237
+            path = "..$(sep)a$(sep)b$(sep)c"
+            @test relpath(abspath(path)) == path
         end
         test_relpath()
     end
diff --git a/test/precompile.jl b/test/precompile.jl
index f69e7a4766f2cb..999bd07c9e12b6 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -823,6 +823,10 @@ precompile_test_harness("Issue #25971") do load_path
     chmod(sourcefile, 0o600)
     cachefile = Base.compilecache(Base.PkgId("Foo25971"))
     @test filemode(sourcefile) == filemode(cachefile)
+    chmod(sourcefile, 0o444)
+    cachefile = Base.compilecache(Base.PkgId("Foo25971"))
+    # Check writable
+    @test touch(cachefile) == cachefile
 end
 
 precompile_test_harness("Issue #38312") do load_path
@@ -871,3 +875,47 @@ precompile_test_harness("Renamed Imports") do load_path
     Base.compilecache(Base.PkgId("RenameImports"))
     @test (@eval (using RenameImports; RenameImports.test())) isa Module
 end
+
+# issue #41872 (example from #38983)
+precompile_test_harness("No external edges") do load_path
+    write(joinpath(load_path, "NoExternalEdges.jl"),
+          """
+          module NoExternalEdges
+          bar(x::Int) = hcat(rand())
+          @inline bar() = hcat(rand())
+          bar(x::Float64) = bar()
+          foo1() = bar(1)
+          foo2() = bar(1.0)
+          foo3() = bar()
+          foo4() = hcat(rand())
+          precompile(foo1, ())
+          precompile(foo2, ())
+          precompile(foo3, ())
+          precompile(foo4, ())
+          end
+          """)
+    Base.compilecache(Base.PkgId("NoExternalEdges"))
+    @eval begin
+        using NoExternalEdges
+        @test only(methods(NoExternalEdges.foo1)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo2)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo3)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo4)).specializations[1].cache.max_world != 0
+    end
+end
+
+@testset "issue 38149" begin
+    M = Module()
+    @eval M begin
+        @nospecialize
+        f(x, y) = x + y
+        f(x::Int, y) = 2x + y
+    end
+    precompile(M.f, (Int, Any))
+    precompile(M.f, (AbstractFloat, Any))
+    mis = map(methods(M.f)) do m
+        m.specializations[1]
+    end
+    @test any(mi -> mi.specTypes.parameters[2] === Any, mis)
+    @test all(mi -> isa(mi.cache, Core.CodeInstance), mis)
+end
diff --git a/test/ranges.jl b/test/ranges.jl
index fd12a0829ce5e7..e46207cc1481cc 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Base.Checked: checked_length
+
 @testset "range construction" begin
     @test_throws ArgumentError range(start=1, step=1, stop=2, length=10)
     @test_throws ArgumentError range(start=1, step=1, stop=10, length=11)
@@ -18,6 +20,20 @@
     # the next ones use ==, because it changes the eltype
     @test r ==  range(first(r),       last(r),      length(r)       )
     @test r ==  range(start=first(r), stop=last(r), length=length(r))
+    @test r === range(                stop=last(r), length=length(r))
+
+    r = 1:5
+    o = Base.OneTo(5)
+    let start=first(r), step=step(r), stop=last(r), length=length(r)
+        @test o === range(;              stop        )
+        @test o === range(;                    length)
+        @test r === range(; start,       stop        )
+        @test r === range(;              stop, length)
+        # the next three lines uses ==, because it changes the eltype
+        @test r ==  range(; start,       stop, length)
+        @test r ==  range(; start, step,       length)
+        @test r ==  range(; stop=Float64(stop))
+    end
 
     for T = (Int8, Rational{Int16}, UInt32, Float64, Char)
         @test typeof(range(start=T(5), length=3)) === typeof(range(stop=T(5), length=3))
@@ -267,22 +283,28 @@ end
         end
     end
     @testset "length" begin
-        @test length(.1:.1:.3) == 3
-        @test length(1.1:1.1:3.3) == 3
-        @test length(1.1:1.3:3) == 2
-        @test length(1:1:1.8) == 1
-        @test length(1:.2:2) == 6
-        @test length(1.:.2:2.) == 6
-        @test length(2:-.2:1) == 6
-        @test length(2.:-.2:1.) == 6
-        @test length(2:.2:1) == 0
+        @test length(.1:.1:.3) == checked_length(.1:.1:.3) == 3
+        @test length(1.1:1.1:3.3) == checked_length(1.1:1.1:3.3) == 3
+        @test length(1.1:1.3:3) == checked_length(1.1:1.3:3) == 2
+        @test length(1:1:1.8) == checked_length(1:1:1.8) == 1
+        @test length(1:.2:2) == checked_length(1:.2:2) == 6
+        @test length(1.:.2:2.) == checked_length(1.:.2:2.) == 6
+        @test length(2:-.2:1) == checked_length(2:-.2:1) == 6
+        @test length(2.:-.2:1.) == checked_length(2.:-.2:1.) == 6
+        @test length(2:.2:1) == checked_length(2:.2:1) == 0
         @test length(2.:.2:1.) == 0
 
-        @test length(1:0) == 0
-        @test length(0.0:-0.5) == 0
-        @test length(1:2:0) == 0
-        @test length(Char(0):Char(0x001fffff)) == 2097152
-        @test length(typemax(UInt64)//one(UInt64):1:typemax(UInt64)//one(UInt64)) == 1
+        @test length(1:0) == checked_length(1:0) == 0
+        @test length(0.0:-0.5) == checked_length(0.0:-0.5) == 0
+        @test length(1:2:0) == checked_length(1:2:0) == 0
+        let r = Char(0):Char(0x001fffff)
+            @test length(r) == 2097152
+            @test_throws MethodError checked_length(r) == 2097152 # this would work if checked_sub is defined on Char
+        end
+        let r = typemax(UInt64)//one(UInt64):1:typemax(UInt64)//one(UInt64)
+            @test length(r) == 1
+            @test_throws MethodError checked_length(r) == 1 # this would work if checked_sub is defined on Rational
+        end
     end
     @testset "keys/values" begin
         keytype_is_correct(r) = keytype(r) == eltype(keys(r))
@@ -395,6 +417,9 @@ end
         @test intersect(1:3, 2) === intersect(2, 1:3) === 2:2
         @test intersect(1.0:3.0, 2) == intersect(2, 1.0:3.0) == [2.0]
 
+        @test intersect(1:typemax(Int), [1, 3]) == [1, 3]
+        @test intersect([1, 3], 1:typemax(Int)) == [1, 3]
+
         @testset "Support StepRange with a non-numeric step" begin
             start = Date(1914, 7, 28)
             stop = Date(1918, 11, 11)
@@ -404,6 +429,21 @@ end
             @test intersect(start-Day(10):Day(1):stop-Day(10), start:Day(5):stop) ==
                 start:Day(5):stop-Day(10)-mod(stop-start, Day(5))
         end
+
+        @testset "Two AbstractRanges" begin
+            struct DummyRange{T} <: AbstractRange{T}
+                r
+            end
+            Base.iterate(dr::DummyRange) = iterate(dr.r)
+            Base.iterate(dr::DummyRange, state) = iterate(dr.r, state)
+            Base.length(dr::DummyRange) = length(dr.r)
+            Base.in(x::Int, dr::DummyRange) = in(x, dr.r)
+            Base.unique(dr::DummyRange) = unique(dr.r)
+            r1 = DummyRange{Int}([1, 2, 3, 3, 4, 5])
+            r2 = DummyRange{Int}([3, 3, 4, 5, 6])
+            @test intersect(r1, r2) == [3, 4, 5]
+            @test intersect(r2, r1) == [3, 4, 5]
+        end
     end
     @testset "issubset" begin
         @test issubset(1:3, 1:typemax(Int)) #32461
@@ -459,6 +499,11 @@ end
 
         @test !(1 in 1:0)
         @test !(1.0 in 1.0:0.0)
+
+        for r = (1:10, 1//1:10//1, 1:2:5, 1//2:1//2:5//2, 1.0:5.0, LinRange(1.5, 5.5, 9)),
+            x = (NaN16, Inf32, -Inf64, 1//0, -1//0)
+            @test !(x in r)
+        end
     end
     @testset "in() works across types, including non-numeric types (#21728)" begin
         @test 1//1 in 1:3
@@ -496,22 +541,54 @@ for a=AbstractRange[3:6, 0:2:10], b=AbstractRange[0:1, 2:-1:0]
 end
 
 # avoiding intermediate overflow (#5065)
-@test length(1:4:typemax(Int)) == div(typemax(Int),4) + 1
+@test length(1:4:typemax(Int)) == div(typemax(Int), 4) + 1
+@test checked_length(1:4:typemax(Int)) == div(typemax(Int), 4) + 1 # computed exactly in modulo arithmetic
 
 @testset "overflow in length" begin
-    Tset = Int === Int64 ? (Int,UInt,Int128,UInt128) :
-                           (Int,UInt,Int64,UInt64,Int128, UInt128)
+    Tset = Int === Int64 ? (Int, UInt, Int128, UInt128) :
+                           (Int, UInt, Int64, UInt64, Int128, UInt128)
     for T in Tset
-        @test_throws OverflowError length(zero(T):typemax(T))
-        @test_throws OverflowError length(typemin(T):typemax(T))
-        @test_throws OverflowError length(zero(T):one(T):typemax(T))
-        @test_throws OverflowError length(typemin(T):one(T):typemax(T))
+        @test length(zero(T):typemax(T)) == typemin(T)
+        @test length(typemin(T):typemax(T)) == T(0)
+        @test length(zero(T):one(T):typemax(T)) == typemin(T)
+        @test length(typemin(T):one(T):typemax(T)) == T(0)
+        @test_throws OverflowError checked_length(zero(T):typemax(T))
+        @test_throws OverflowError checked_length(typemin(T):typemax(T))
+        @test_throws OverflowError checked_length(zero(T):one(T):typemax(T))
+        @test_throws OverflowError checked_length(typemin(T):one(T):typemax(T))
+        @test length(one(T):typemax(T)) == checked_length(one(T):typemax(T)) == typemax(T)
         if T <: Signed
-            @test_throws OverflowError length(-one(T):typemax(T)-one(T))
-            @test_throws OverflowError length(-one(T):one(T):typemax(T)-one(T))
+            @test length(-one(T):typemax(T)-one(T)) == typemin(T)
+            @test length(-one(T):one(T):typemax(T)-one(T)) == typemin(T)
+            @test length(-one(T):typemax(T)) == typemin(T) + T(1)
+            @test length(zero(T):typemin(T):typemin(T)) == 2
+            @test length(one(T):typemin(T):typemin(T)) == 2
+            @test length(typemax(T):typemin(T):typemin(T)) == 2
+            @test length(-one(T):typemin(T):typemin(T)) == 1
+            @test length(zero(T):typemin(T):zero(T)) == 1
+            @test length(zero(T):typemin(T):one(T)) == 0
+            @test_throws OverflowError checked_length(-one(T):typemax(T)-one(T))
+            @test_throws OverflowError checked_length(-one(T):one(T):typemax(T)-one(T))
+            @test_throws InexactError checked_length(zero(T):typemin(T):typemin(T)) == 2 # this can be improved
+            @test_throws InexactError checked_length(one(T):typemin(T):typemin(T)) == 2 # this can  be improved
+            @test_throws InexactError checked_length(typemax(T):typemin(T):typemin(T)) == 2 # this can  be improved
         end
     end
 end
+
+# A number type with the overflow behavior of `UInt8`. Conversion to `Integer` returns an
+# `Int32`, i.e., a type with different `typemin`/`typemax`. See  #41479
+struct OverflowingReal <: Real
+    val::UInt8
+end
+OverflowingReal(x::OverflowingReal) = x
+Base.:<=(x::OverflowingReal, y::OverflowingReal) = x.val <= y.val
+Base.:+(x::OverflowingReal, y::OverflowingReal) = OverflowingReal(x.val + y.val)
+Base.:-(x::OverflowingReal, y::OverflowingReal) = OverflowingReal(x.val - y.val)
+Base.round(x::OverflowingReal, ::RoundingMode) = x
+Base.Integer(x::OverflowingReal) = Int32(x.val)
+@test length(OverflowingReal(1):OverflowingReal(0)) == 0
+
 @testset "loops involving typemin/typemax" begin
     n = 0
     s = 0
@@ -618,14 +695,10 @@ end
     @test broadcast(+, T(1):2:6, 0.3) === T(1)+0.3:2:5+0.3
     @test broadcast(-, T(1):2:6, 1) === T(0):2:4
     @test broadcast(-, T(1):2:6, 0.3) === T(1)-0.3:2:5-0.3
-    if T <: Unsigned
-        @test_broken broadcast(-, T(1):3) == -T(1):-1:-T(3)
-        @test_broken broadcast(-, 2, T(1):3) == T(1):-1:-T(1)
-    else
-        @test length(broadcast(-, T(1):3, 2)) === length(T(1)-2:T(3)-2)
-        @test broadcast(-, T(1):3) == -T(1):-1:-T(3)
-        @test broadcast(-, 2, T(1):3) == T(1):-1:-T(1)
-    end
+    is_unsigned = T <: Unsigned
+    is_unsigned && @test length(broadcast(-, T(1):3, 2)) === length(T(1)-2:T(3)-2)
+    @test broadcast(-, T(1):3) == -T(1):-T(1):-T(3)
+    @test broadcast(-, 2, T(1):3) == T(1):-T(1):-T(1)
 end
 @testset "operations between ranges and arrays" for T in (Int, UInt, Int128)
     @test all(([T(1):5;] + (T(5):-1:1)) .=== T(6))
@@ -870,32 +943,45 @@ end
 end
 # issue #2959
 @test 1.0:1.5 == 1.0:1.0:1.5 == 1.0:1.0
-#@test 1.0:(.3-.1)/.1 == 1.0:2.0
+@test_broken 1.0:(.3-.1)/.1 == 1.0:2.0 # (this is just shy of 2.0)
 
 @testset "length with typemin/typemax" begin
-    let r = typemin(Int64):2:typemax(Int64), s = typemax(Int64):-2:typemin(Int64)
+    let r = typemin(Int64):2:typemax(Int64)
         @test first(r) == typemin(Int64)
-        @test last(r) == (typemax(Int64)-1)
-        @test_throws OverflowError length(r)
-
-        @test first(s) == typemax(Int64)
-        @test last(s) == (typemin(Int64)+1)
-        @test_throws OverflowError length(s)
+        @test last(r) == typemax(Int64) - 1
+        @test length(r) == typemin(Int64)
+        @test_throws OverflowError checked_length(r)
+    end
+    let r = typemax(Int64):-2:typemin(Int64)
+        @test first(r) == typemax(Int64)
+        @test last(r) == typemin(Int64) + 1
+        @test length(r) == typemin(Int64)
+        @test_throws OverflowError checked_length(r)
     end
 
-    @test length(typemin(Int64):3:typemax(Int64)) == 6148914691236517206
-    @test length(typemax(Int64):-3:typemin(Int64)) == 6148914691236517206
+    let r = typemin(Int64):3:typemax(Int64)
+        @test length(r) == checked_length(r) == 6148914691236517206
+    end
+    let r = typemax(Int64):-3:typemin(Int64)
+        @test length(r) == checked_length(r) == 6148914691236517206
+    end
 
     for s in 3:100
-        @test length(typemin(Int):s:typemax(Int)) == length(big(typemin(Int)):big(s):big(typemax(Int)))
-        @test length(typemax(Int):-s:typemin(Int)) == length(big(typemax(Int)):big(-s):big(typemin(Int)))
+        r = typemin(Int):s:typemax(Int)
+        br = big(typemin(Int)):big(s):big(typemax(Int))
+        @test length(r) == checked_length(r) == length(br)
+
+        r = typemax(Int):-s:typemin(Int)
+        br = big(typemax(Int)):big(-s):big(typemin(Int))
+        @test length(r) == checked_length(r) == length(br)
     end
 
-    @test length(UInt(1):UInt(1):UInt(0)) == 0
-    @test length(typemax(UInt):UInt(1):(typemax(UInt)-1)) == 0
-    @test length(typemax(UInt):UInt(2):(typemax(UInt)-1)) == 0
-    @test length((typemin(Int)+3):5:(typemin(Int)+1)) == 0
+    @test length(UInt(1):UInt(1):UInt(0)) == checked_length(UInt(1):UInt(1):UInt(0)) == 0
+    @test length(typemax(UInt):UInt(1):(typemax(UInt)-1)) == checked_length(typemax(UInt):UInt(1):(typemax(UInt)-1)) == 0
+    @test length(typemax(UInt):UInt(2):(typemax(UInt)-1)) == checked_length(typemax(UInt):UInt(2):(typemax(UInt)-1)) == 0
+    @test length((typemin(Int)+3):5:(typemin(Int)+1)) == checked_length((typemin(Int)+3):5:(typemin(Int)+1)) == 0
 end
+
 # issue #6364
 @test length((1:64)*(pi/5)) == 64
 
@@ -965,7 +1051,8 @@ end
                 (Int8,UInt8,Int16,UInt16,Int32,UInt32) :
                 (Int8,UInt8,Int16,UInt16))
     for T in smallint
-        @test length(typemin(T):typemax(T)) == 2^(8*sizeof(T))
+        s = typemin(T):typemax(T)
+        @test length(s) == checked_length(s) == 2^(8*sizeof(T))
     end
 end
 
@@ -973,7 +1060,7 @@ end
 @test (0:1//2:2)[1:2:3] == 0:1//1:1
 
 # issue #12278
-@test length(1:UInt(0)) == 0
+@test length(1:UInt(0)) == checked_length(1:UInt(0)) == 0
 
 @testset "zip" begin
     i = 0
@@ -1046,17 +1133,14 @@ end
     @test reverse(LinRange{Int}(0,3,4)) === LinRange{Int}(3,0,4)
     @test reverse(LinRange{Float64}(0.,3.,4)) === LinRange{Float64}(3.,0.,4)
 end
-@testset "Issue #11245" begin
-    io = IOBuffer()
-    show(io, range(1, stop=2, length=3))
-    str = String(take!(io))
-#    @test str == "range(1.0, stop=2.0, length=3)"
-    @test str == "1.0:0.5:2.0"
-end
+
+# issue #11245
+@test repr(range(1, stop=2, length=3)) == "1.0:0.5:2.0"
 
 @testset "issue 10950" begin
     r = 1//2:3
     @test length(r) == 3
+    @test checked_length(r) == 3
     i = 1
     for x in r
         @test x == i//2
@@ -1069,10 +1153,11 @@ end
     # repr/show should display the range nicely
     # to test print_range in range.jl
     replrepr(x) = repr("text/plain", x; context=IOContext(stdout, :limit=>true, :displaysize=>(24, 80)))
+    nb = Sys.WORD_SIZE
     @test replrepr(1:4) == "1:4"
     @test repr("text/plain", 1:4) == "1:4"
     @test repr("text/plain", range(1, stop=5, length=7)) == "1.0:0.6666666666666666:5.0"
-    @test repr("text/plain", LinRange{Float64}(1,5,7)) == "7-element LinRange{Float64}:\n 1.0,1.66667,2.33333,3.0,3.66667,4.33333,5.0"
+    @test repr("text/plain", LinRange{Float64}(1,5,7)) == "7-element LinRange{Float64, Int$nb}:\n 1.0,1.66667,2.33333,3.0,3.66667,4.33333,5.0"
     @test repr(range(1, stop=5, length=7)) == "1.0:0.6666666666666666:5.0"
     @test repr(LinRange{Float64}(1,5,7)) == "range(1.0, stop=5.0, length=7)"
     @test replrepr(0:100.) == "0.0:1.0:100.0"
@@ -1080,13 +1165,13 @@ end
     # only examines spacing of the left and right edges of the range, sufficient
     # to cover the designated screen size.
     @test replrepr(range(0, stop=100, length=10000)) == "0.0:0.010001000100010001:100.0"
-    @test replrepr(LinRange{Float64}(0,100, 10000)) == "10000-element LinRange{Float64}:\n 0.0,0.010001,0.020002,0.030003,0.040004,…,99.95,99.96,99.97,99.98,99.99,100.0"
+    @test replrepr(LinRange{Float64}(0,100, 10000)) == "10000-element LinRange{Float64, Int$nb}:\n 0.0,0.010001,0.020002,0.030003,0.040004,…,99.95,99.96,99.97,99.98,99.99,100.0"
 
     @test sprint(show, UnitRange(1, 2)) == "1:2"
     @test sprint(show, StepRange(1, 2, 5)) == "1:2:5"
 end
 
-@testset "Issue 11049 and related" begin
+@testset "Issue 11049, and related" begin
     @test promote(range(0f0, stop=1f0, length=3), range(0., stop=5., length=2)) ===
         (range(0., stop=1., length=3), range(0., stop=5., length=2))
     @test convert(LinRange{Float64}, range(0., stop=1., length=3)) === LinRange(0., 1., 3)
@@ -1148,6 +1233,7 @@ end
     @test [reverse(range(1.0, stop=27.0, length=1275));] ==
         reverse([range(1.0, stop=27.0, length=1275);])
 end
+
 @testset "PR 12200 and related" begin
     for _r in (1:2:100, 1:100, 1f0:2f0:100f0, 1.0:2.0:100.0,
                range(1, stop=100, length=10), range(1f0, stop=100f0, length=10))
@@ -1266,19 +1352,22 @@ end
     end
 
     r = 1f8-10:1f8
-    @test_broken argmin(f) == argmin(collect(r))
-    @test_broken argmax(f) == argmax(collect(r))
+    rv = collect(r)
+    @test argmin(r) == argmin(rv) == 1
+    @test r[argmax(r)] == r[argmax(rv)] == 1f8
+    @test argmax(r) == lastindex(r)
+    @test argmax(rv) != lastindex(r)
 end
 
 @testset "OneTo" begin
     let r = Base.OneTo(-5)
         @test isempty(r)
-        @test length(r) == 0
+        @test length(r) == checked_length(r) == 0
         @test size(r) == (0,)
     end
     let r = Base.OneTo(3)
         @test !isempty(r)
-        @test length(r) == 3
+        @test length(r) == checked_length(r) == 3
         @test size(r) == (3,)
         @test step(r) == 1
         @test first(r) == 1
@@ -1292,8 +1381,8 @@ end
         @test_throws BoundsError r[4]
         @test_throws BoundsError r[0]
         @test broadcast(+, r, 1) === 2:4
-        @test 2*r === 2:2:6
-        @test r + r === 2:2:6
+        @test 2*r == 2:2:6
+        @test r + r == 2:2:6
         k = 0
         for i in r
             @test i == (k += 1)
@@ -1375,7 +1464,7 @@ end
 
 @testset "issue #20520" begin
     r = range(1.3173739f0, stop=1.3173739f0, length=3)
-    @test length(r) == 3
+    @test length(r) == checked_length(r) == 3
     @test first(r) === 1.3173739f0
     @test last(r)  === 1.3173739f0
     @test r[2]     === 1.3173739f0
@@ -1399,7 +1488,8 @@ using .Main.Furlongs
 
 @testset "dimensional correctness" begin
     @test length(Vector(Furlong(2):Furlong(10))) == 9
-    @test length(range(Furlong(2), length=9)) == 9
+    @test length(range(Furlong(2), length=9)) == checked_length(range(Furlong(2), length=9)) == 9
+    @test @inferred(length(StepRange(Furlong(2), Furlong(1), Furlong(1)))) == 0
     @test Vector(Furlong(2):Furlong(1):Furlong(10)) == Vector(range(Furlong(2), step=Furlong(1), length=9)) == Furlong.(2:10)
     @test Vector(Furlong(1.0):Furlong(0.5):Furlong(10.0)) ==
           Vector(Furlong(1):Furlong(0.5):Furlong(10)) == Furlong.(1:0.5:10)
@@ -1436,22 +1526,26 @@ end
     @test @inferred(r .+ x) === 3:7
     @test @inferred(r .- x) === -1:3
     @test @inferred(x .- r) === 1:-1:-3
-    @test @inferred(x .* r) === 2:2:10
-    @test @inferred(r .* x) === 2:2:10
+    @test @inferred(x .* r) == 2:2:10
+    @test @inferred(r .* x) == 2:2:10
     @test @inferred(r ./ x) === 0.5:0.5:2.5
     @test @inferred(x ./ r) == 2 ./ [r;] && isa(x ./ r, Vector{Float64})
     @test @inferred(r .\ x) == 2 ./ [r;] && isa(x ./ r, Vector{Float64})
     @test @inferred(x .\ r) === 0.5:0.5:2.5
 
-    @test @inferred(2 .* (r .+ 1) .+ 2) === 6:2:14
+    @test @inferred(2 .* (r .+ 1) .+ 2) == 6:2:14
 end
 
 @testset "Bad range calls" begin
     @test_throws ArgumentError range(1)
     @test_throws ArgumentError range(nothing)
     @test_throws ArgumentError range(1, step=4)
-    @test_throws ArgumentError range(nothing, length=2)
+    @test_throws ArgumentError range(; step=1, length=6)
+    @test_throws ArgumentError range(; step=2, stop=7.5)
     @test_throws ArgumentError range(1.0, step=0.25, stop=2.0, length=5)
+    @test_throws ArgumentError range(; stop=nothing)
+    @test_throws ArgumentError range(; length=nothing)
+    @test_throws TypeError range(; length=5.5)
 end
 
 @testset "issue #23300#issuecomment-371575548" begin
@@ -1473,6 +1567,8 @@ end
     @test view(1:10, 1:5) === 1:5
     @test view(1:10, 1:2:5) === 1:2:5
     @test view(1:2:9, 1:5) === 1:2:9
+    @test view(1:10, :) === 1:10
+    @test view(1:2:9, :) === 1:2:9
 
     # Ensure we don't hit a fallback `view` if there's a better `getindex` implementation
     vmt = collect(methods(view, Tuple{AbstractRange, AbstractRange}))
@@ -1494,15 +1590,18 @@ module NonStandardIntegerRangeTest
 
 using Test
 
+using Base.Checked: checked_length
+import Base.Checked: checked_add, checked_sub
+
 struct Position <: Integer
     val::Int
 end
-Position(x::Position) = x # to resolve ambiguity with boot.jl:728
+Position(x::Position) = x # to resolve ambiguity with boot.jl:770
 
 struct Displacement <: Integer
     val::Int
 end
-Displacement(x::Displacement) = x # to resolve ambiguity with boot.jl:728
+Displacement(x::Displacement) = x # to resolve ambiguity with boot.jl:770
 
 Base.:-(x::Displacement) = Displacement(-x.val)
 Base.:-(x::Position, y::Position) = Displacement(x.val - y.val)
@@ -1519,14 +1618,67 @@ Base.Unsigned(x::Displacement) = Unsigned(x.val)
 Base.rem(x::Displacement, y::Displacement) = Displacement(rem(x.val, y.val))
 Base.div(x::Displacement, y::Displacement) = Displacement(div(x.val, y.val))
 
-# required for collect (summing lengths); alternatively, should unsafe_length return Int by default?
+# required for collect (summing lengths); alternatively, should length return Int by default?
 Base.promote_rule(::Type{Displacement}, ::Type{Int}) = Int
 Base.convert(::Type{Int}, x::Displacement) = x.val
 
+# Unsigned complement, for testing checked_length
+struct UPosition <: Unsigned
+    val::UInt
+end
+UPosition(x::UPosition) = x # to resolve ambiguity with boot.jl:770
+
+struct UDisplacement <: Unsigned
+    val::UInt
+end
+UDisplacement(x::UDisplacement) = x # to resolve ambiguity with boot.jl:770
+
+Base.show(io::IO, x::Union{Position, UPosition, Displacement, UDisplacement}) =
+    # should use show if we were to do this properly (instead of just a test-helper)
+    print(io, typeof(x).name.name, "(", x.val, ")")
+
+Base.:-(x::UPosition, y::UPosition) = UDisplacement(x.val - y.val)
+Base.:-(x::UPosition, y::UDisplacement) = UPosition(x.val - y.val)
+Base.:+(x::UPosition, y::UDisplacement) = UPosition(x.val + y.val)
+Base.:+(x::UDisplacement, y::Displacement) = UDisplacement(x.val + y.val)
+Base.:+(x::UDisplacement, y::UDisplacement) = UDisplacement(x.val + y.val)
+checked_sub(x::UPosition, y::UPosition) = UDisplacement(checked_sub(x.val, y.val))
+checked_sub(x::UPosition, y::UDisplacement) = UPosition(checked_sub(x.val, y.val))
+checked_sub(x::UDisplacement, y::UDisplacement) = UDisplacement(checked_sub(x.val, y.val))
+checked_add(x::UPosition, y::UDisplacement) = UPosition(checked_add(x.val, y.val))
+checked_add(x::UDisplacement, y::UDisplacement) = UDisplacement(checked_add(x.val, y.val))
+Base.:+(x::UPosition, y::Displacement) = UPosition(x.val + y.val)
+Base.:(<=)(x::UPosition, y::UPosition) = x.val <= y.val
+Base.:(<)(x::UPosition, y::UPosition) = x.val < y.val
+Base.:(<)(x::UDisplacement, y::UDisplacement) = x.val < y.val
+
+# for StepRange computation:
+Base.rem(x::UDisplacement, y::Displacement) = UDisplacement(rem(x.val, y.val))
+Base.div(x::UDisplacement, y::Displacement) = UDisplacement(div(x.val, y.val))
+Base.rem(x::UDisplacement, y::UDisplacement) = UDisplacement(rem(x.val, y.val))
+Base.div(x::UDisplacement, y::UDisplacement) = UDisplacement(div(x.val, y.val))
+
+#Base.promote_rule(::Type{UDisplacement}, ::Type{Int}) = Int
+#Base.convert(::Type{Int}, x::UDisplacement) = Int(x.val)
+
 @testset "Ranges with nonstandard Integers" begin
     for (start, stop) in [(2, 4), (3, 3), (3, -2)]
-        @test collect(Position(start) : Position(stop)) == Position.(start : stop)
-    end
+        r = Position(start) : Position(stop)
+        @test length(r) === Displacement(stop >= start ? stop - start + 1 : 0)
+        start >= 0 && stop >= 0 && @test UDisplacement(length(r).val) ===
+              checked_length(UPosition(start) : UPosition(stop)) ===
+              checked_length(UPosition(start) : Displacement(1) : UPosition(stop)) ===
+              checked_length(UPosition(start) : UDisplacement(1) : UPosition(stop))
+        @test collect(r) == Position.(start : stop)
+    end
+
+    @test length(UPosition(3):Displacement(7):UPosition(100)) === checked_length(UPosition(3):Displacement(7):UPosition(100)) === UDisplacement(14)
+    @test length(UPosition(100):Displacement(7):UPosition(3)) === checked_length(UPosition(100):Displacement(7):UPosition(3)) === UDisplacement(0)
+    @test length(UPosition(100):Displacement(-7):UPosition(3)) === checked_length(UPosition(100):Displacement(-7):UPosition(3)) === UDisplacement(14)
+    @test length(UPosition(3):Displacement(-7):UPosition(100)) === checked_length(UPosition(3):Displacement(-7):UPosition(100)) === UDisplacement(0)
+    @test_throws OverflowError checked_length(zero(UPosition):UPosition(typemax(UInt)))
+    @test_throws OverflowError checked_length(zero(UPosition):Displacement(1):UPosition(typemax(UInt)))
+    @test_throws OverflowError checked_length(UPosition(typemax(UInt)):Displacement(-1):zero(UPosition))
 
     for start in [3, 0, -2]
         @test collect(Base.OneTo(Position(start))) == Position.(Base.OneTo(start))
@@ -1548,7 +1700,7 @@ end
 end # module NonStandardIntegerRangeTest
 
 @testset "Issue #26619" begin
-    @test length(UInt(100) : -1 : 1) === UInt(100)
+    @test length(UInt(100) : -1 : 1) == checked_length(UInt(100) : -1 : 1) === UInt(100)
     @test collect(UInt(5) : -1 : 3) == [UInt(5), UInt(4), UInt(3)]
 
     let r = UInt(5) : -2 : 2
@@ -1568,23 +1720,35 @@ end # module NonStandardIntegerRangeTest
 end
 
 @testset "constant-valued ranges (issues #10391 and #29052)" begin
-    for r in ((1:4), (1:1:4), (1.0:4.0))
-        if eltype(r) === Int
-            @test_broken @inferred(0 * r) == [0.0, 0.0, 0.0, 0.0]
-            @test_broken @inferred(0 .* r) == [0.0, 0.0, 0.0, 0.0]
-            @test_broken @inferred(r + (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
-            @test_broken @inferred(r .+ (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
-        else
-            @test @inferred(0 * r) == [0.0, 0.0, 0.0, 0.0]
-            @test @inferred(0 .* r) == [0.0, 0.0, 0.0, 0.0]
-            @test @inferred(r + (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
-            @test @inferred(r .+ (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
-        end
+    @testset "with $(nameof(typeof(r))) of $(eltype(r))" for r in ((1:4), (1:1:4), StepRangeLen(1,1,4), (1.0:4.0))
+        @test @inferred(0 * r) == [0.0, 0.0, 0.0, 0.0]
+        @test @inferred(0 .* r) == [0.0, 0.0, 0.0, 0.0]
+        @test @inferred(r .* 0) == [0.0, 0.0, 0.0, 0.0]
+        @test @inferred(r + (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
+        @test @inferred(r .+ (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
+        @test @inferred(r - r) == [0.0, 0.0, 0.0, 0.0]
+        @test @inferred(r .- r) == [0.0, 0.0, 0.0, 0.0]
+
         @test @inferred(r .+ (4.0:-1:1)) == [5.0, 5.0, 5.0, 5.0]
         @test @inferred(0.0 * r) == [0.0, 0.0, 0.0, 0.0]
         @test @inferred(0.0 .* r) == [0.0, 0.0, 0.0, 0.0]
         @test @inferred(r / Inf) == [0.0, 0.0, 0.0, 0.0]
         @test @inferred(r ./ Inf) == [0.0, 0.0, 0.0, 0.0]
+
+        @test eval(Meta.parse(repr(0 * r))) == [0.0, 0.0, 0.0, 0.0]
+
+        # Not constant-valued, but related methods:
+        @test @inferred(-1 * r) == [-1,-2,-3,-4]
+        @test @inferred(r * -1) == [-1,-2,-3,-4]
+        @test @inferred(r / -1) == [-1,-2,-3,-4]
+
+        @test @inferred(-1.0 .* r) == [-1,-2,-3,-4]
+        @test @inferred(r .* -1.0) == [-1,-2,-3,-4]
+        @test @inferred(r ./ -1.0) == [-1,-2,-3,-4]
+
+        @test @inferred(-1 * reverse(r)) == [-4,-3,-2,-1]
+        @test @inferred(-1.0 .* reverse(r)) == [-4,-3,-2,-1]
+        @test @inferred(reverse(r) ./ -1.0) == [-4,-3,-2,-1]
     end
 
     @test_broken @inferred(range(0, step=0, length=4)) == [0, 0, 0, 0]
@@ -1597,7 +1761,7 @@ end
     @test @inferred(range(0.0, stop=0, length=4)) == [0.0, 0.0, 0.0, 0.0]
 
     z4 = 0.0 * (1:4)
-    @test @inferred(z4 .+ (1:4)) === 1.0:1.0:4.0
+    @test @inferred(z4 .+ (1:4)) == 1.0:1.0:4.0
     @test @inferred(z4 .+ z4) === z4
 end
 
@@ -1753,6 +1917,40 @@ end
     @test typeof(step(StepRangeLen(Int8(1), Int8(2), 3, 2))) === Int8
 end
 
+@testset "LinRange eltype for element types that wrap integers" begin
+    struct RealWrapper{T <: Real} <: Real
+        x :: T
+    end
+    Base.promote_rule(::Type{S}, ::Type{RealWrapper{T}}) where {T,S<:Real} = RealWrapper{promote_type(S, T)}
+    Base.:(-)(w::RealWrapper) = RealWrapper(-w.x)
+    for f in [:(+), :(-), :(*), :(/)]
+        @eval Base.$f(w::RealWrapper, y::RealWrapper) = RealWrapper($f(w.x, y.x))
+    end
+    for f in [:(<), :(==), :(<=)]
+        @eval Base.$f(w::RealWrapper, y::RealWrapper) = $f(w.x, y.x)
+    end
+    for T in [:Float32, :Float64]
+        @eval Base.$T(w::RealWrapper) = $T(w.x)
+    end
+    (::Type{RealWrapper{T}})(w::RealWrapper) where {T<:Real} = RealWrapper{T}(T(w.x))
+    (::Type{T})(w::RealWrapper{T}) where {T<:Real} = T(w.x)
+    Base.:(==)(w::RealWrapper, y::RealWrapper) = w.x == y.x
+    Base.isfinite(w::RealWrapper) = isfinite(w.x)
+    Base.signbit(w::RealWrapper) = signbit(w.x)
+
+    x = RealWrapper(2)
+    r1 = range(x, stop = 2x, length = 10)
+    r2 = range(Int(x), stop = Int(2x), length = 10)
+    for i in eachindex(r1, r2)
+        @test r1[i] ≈ r2[i]
+    end
+    r3 = LinRange(x, 2x, 10)
+    r4 = LinRange(x, 2x, 10)
+    for i in eachindex(r3, r4)
+        @test r3[i] ≈ r4[i]
+    end
+end
+
 @testset "Bool indexing of ranges" begin
     @test_throws ArgumentError Base.OneTo(true)
     @test_throws ArgumentError Base.OneTo(true:true:true)
@@ -1899,3 +2097,87 @@ end
     @test_throws BoundsError r[true:true:false]
     @test_throws BoundsError r[true:true:true]
 end
+@testset "Non-Int64 endpoints that are identical (#39798)" begin
+    for T in DataType[Float16,Float32,Float64,Bool,Int8,Int16,Int32,Int64,Int128,UInt8,UInt16,UInt32,UInt64,UInt128],
+        r in [ LinRange(1, 1, 10), StepRangeLen(7, 0, 5) ]
+        if first(r) > typemax(T)
+            continue
+        end
+        let start=T(first(r)), stop=T(last(r)), step=T(step(r)), length=length(r)
+            @test range(  start, stop,       length) == r
+            @test range(  start, stop;       length) == r
+            @test range(  start; stop,       length) == r
+            @test range(; start, stop,       length) == r
+        end
+    end
+end
+@testset "PR 40320 fixes" begin
+    # found by nanosoldier
+    @test 0.2 * (-2:2) == -0.4:0.2:0.4  # from tests of AbstractFFTs, needs Base.TwicePrecision
+    @test 0.2f0 * (-2:2) == Float32.(-0.4:0.2:0.4)  # likewise needs Float64
+    @test 0.2 * (-2:1:2) == -0.4:0.2:0.4
+
+    # https://github.com/JuliaLang/julia/issues/40846
+    @test 0.1 .* (3:-1:1) ≈ [0.3, 0.2, 0.1]
+    @test (10:-1:1) * 0.1 == 1:-0.1:0.1
+    @test 0.2 * (-2:2:2) == [-0.4, 0, 0.4]
+end
+
+@testset "Indexing OneTo with IdentityUnitRange" begin
+    for endpt in Any[10, big(10), UInt(10)]
+        r = Base.OneTo(endpt)
+        inds = Base.IdentityUnitRange(3:5)
+        rs = r[inds]
+        @test rs === inds
+        @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
+    end
+end
+
+@testset "non 1-based ranges indexing" begin
+    struct ZeroBasedUnitRange{T,A<:AbstractUnitRange{T}} <: AbstractUnitRange{T}
+        a :: A
+        function ZeroBasedUnitRange(a::AbstractUnitRange{T}) where {T}
+            @assert !Base.has_offset_axes(a)
+            new{T, typeof(a)}(a)
+        end
+    end
+
+    Base.parent(A::ZeroBasedUnitRange) = A.a
+    Base.first(A::ZeroBasedUnitRange) = first(parent(A))
+    Base.length(A::ZeroBasedUnitRange) = length(parent(A))
+    Base.last(A::ZeroBasedUnitRange) = last(parent(A))
+    Base.size(A::ZeroBasedUnitRange) = size(parent(A))
+    Base.axes(A::ZeroBasedUnitRange) = map(x -> Base.IdentityUnitRange(0:x-1), size(parent(A)))
+    Base.getindex(A::ZeroBasedUnitRange, i::Int) = parent(A)[i + 1]
+    Base.getindex(A::ZeroBasedUnitRange, i::Integer) = parent(A)[i + 1]
+    Base.firstindex(A::ZeroBasedUnitRange) = 0
+    function Base.show(io::IO, A::ZeroBasedUnitRange)
+        show(io, parent(A))
+        print(io, " with indices $(axes(A,1))")
+    end
+
+    r = ZeroBasedUnitRange(5:8)
+    @test r[0:2] == r[0]:r[2]
+    @test r[0:1:2] == r[0]:1:r[2]
+end
+
+@test length(range(1, 100, length=big(100)^100)) == big(100)^100
+@test length(range(big(1), big(100)^100, length=big(100)^100)) == big(100)^100
+@test length(0 * (1:big(100)^100)) == big(100)^100
+
+@testset "issue #41784" begin
+    # tests `in` when step equals 0
+    # test for Int
+    x = 41784
+    @test (x in StepRangeLen(x, 0, 0)) == false
+    @test (x in StepRangeLen(x, 0, rand(1:100))) == true
+    @test ((x - 1) in StepRangeLen(x, 0, rand(1:100))) == false
+    @test ((x + 1) in StepRangeLen(x, 0, rand(1:100))) == false
+
+    # test for Char
+    x = 'z'
+    @test (x in StepRangeLen(x, 0, 0)) == false
+    @test (x in StepRangeLen(x, 0, rand(1:100))) == true
+    @test ((x - 1) in StepRangeLen(x, 0, rand(1:100))) == false
+    @test ((x + 1) in StepRangeLen(x, 0, rand(1:100))) == false
+end
diff --git a/test/rational.jl b/test/rational.jl
index 07e312543318f8..a329a1ac5f93df 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -488,6 +488,8 @@ end
         @test gcd(b, a) === T(2)//T(105)
         @test lcm(a, b) === T(30)//T(7)
         if T <: Signed
+            @test gcd(-a) === a
+            @test lcm(-b) === b
             @test gcdx(a, b) === (T(2)//T(105), T(-11), T(4))
             @test gcd(-a, b) === T(2)//T(105)
             @test gcd(a, -b) === T(2)//T(105)
@@ -616,3 +618,15 @@ end
 @testset "checked_den with different integer types" begin
     @test Base.checked_den(Int8(4), Int32(8)) == Base.checked_den(Int32(4), Int32(8))
 end
+
+@testset "Rational{T} with non-concrete T (issue #41222)" begin
+    @test @inferred(Rational{Integer}(2,3)) isa Rational{Integer}
+end
+
+@testset "issue #41489" begin
+    @test Core.Compiler.return_type(+, NTuple{2, Rational}) == Rational
+    @test Core.Compiler.return_type(-, NTuple{2, Rational}) == Rational
+
+    A=Rational[1 1 1; 2 2 2; 3 3 3]
+    @test @inferred(A*A) isa Matrix{Rational}
+end
diff --git a/test/read.jl b/test/read.jl
index aa897342393a10..78ecded83c80a3 100644
--- a/test/read.jl
+++ b/test/read.jl
@@ -98,30 +98,27 @@ s = io(text)
 close(s)
 push!(l, ("PipeEndpoint", io))
 
-#FIXME See https://github.com/JuliaLang/julia/issues/14747
-#      Reading from open(::Command) seems to deadlock on Linux
-#=
-if !Sys.iswindows()
 
-# Windows type command not working?
-# See "could not spawn `type 'C:\Users\appveyor\AppData\Local\Temp\1\jul3516.tmp\file.txt'`"
-#https://ci.appveyor.com/project/StefanKarpinski/julia/build/1.0.12733/job/hpwjs4hmf03vs5ag#L1244
-
-# Pipe
+# Pipe (#14747)
 io = (text) -> begin
     write(filename, text)
-    open(`$(Sys.iswindows() ? "type" : "cat") $filename`)[1]
-#    Was open(`echo -n $text`)[1]
-#    See https://github.com/JuliaLang/julia/issues/14747
+    # we can skip using shell_escape_wincmd, since ", ^, and % aren't legal in
+    # a filename, so unconditionally wrapping in " is sufficient (okay, that's
+    # a lie, since ^ and % actually are legal, but DOS is broken)
+    if Sys.iswindows()
+        cmd = Cmd(["cmd.exe", "/c type \"$(replace(filename, '/' => '\\'))\""])
+        cmd = Cmd(cmd; windows_verbatim=true)
+        cmd = pipeline(cmd, stderr=devnull)
+    else
+        cmd = `cat $filename`
+    end
+    open(cmd)
 end
 s = io(text)
 @test isa(s, IO)
-@test isa(s, Pipe)
+@test isa(s, Base.Process)
 close(s)
-push!(l, ("Pipe", io))
-
-end
-=#
+push!(l, ("Process", io))
 
 
 open_streams = []
@@ -140,7 +137,6 @@ end
 verbose = false
 
 for (name, f) in l
-    local f
     local function io(text=text)
         local s = f(text)
         push!(open_streams, s)
@@ -319,9 +315,9 @@ for (name, f) in l
     text = old_text
     write(filename, text)
 
-    if !(typeof(io()) in [Base.PipeEndpoint, Pipe, TCPSocket])
+    if !isa(io(), Union{Base.PipeEndpoint, Base.AbstractPipe, TCPSocket})
         verbose && println("$name position...")
-        @test (s = io(); read!(s, Vector{UInt8}(undef, 4)); position(s))  == 4
+        @test (s = io(); read!(s, Vector{UInt8}(undef, 4)); position(s)) == 4
 
         verbose && println("$name seek...")
         for n = 0:length(text)-1
diff --git a/test/reduce.jl b/test/reduce.jl
index f87b2285480f49..78ac22c13f366a 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -49,8 +49,8 @@ end
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1:5; init=1000) == (1000 + 1 + 2 + 3 + 4 + 5)
 @test reduce(+, 1) == 1
-@test_throws ArgumentError reduce(*, ())
-@test_throws ArgumentError reduce(*, Union{}[])
+@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, ())
+@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, Union{}[])
 
 # mapreduce
 @test mapreduce(-, +, [-10 -9 -3]) == ((10 + 9) + 3)
@@ -87,8 +87,10 @@ end
 @test mapreduce(abs2, *, Float64[]) === 1.0
 @test mapreduce(abs2, max, Float64[]) === 0.0
 @test mapreduce(abs, max, Float64[]) === 0.0
-@test_throws ArgumentError mapreduce(abs2, &, Float64[])
-@test_throws ArgumentError mapreduce(abs2, |, Float64[])
+@test_throws ["reducing over an empty collection is not allowed",
+              "consider supplying `init`"] mapreduce(abs2, &, Float64[])
+@test_throws str -> !occursin("Closest candidates are", str) mapreduce(abs2, &, Float64[])
+@test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, |, Float64[])
 
 # mapreduce() type stability
 @test typeof(mapreduce(*, +, Int8[10])) ===
@@ -138,8 +140,9 @@ fz = float(z)
 @test sum(z) === 136
 @test sum(fz) === 136.0
 
-@test_throws ArgumentError sum(Union{}[])
-@test_throws ArgumentError sum(sin, Int[])
+@test_throws "reducing with add_sum over an empty collection of element type Union{} is not allowed" sum(Union{}[])
+@test_throws ["reducing over an empty collection is not allowed",
+              "consider supplying `init`"] sum(sin, Int[])
 @test sum(sin, 3) == sin(3.0)
 @test sum(sin, [3]) == sin(3.0)
 a = sum(sin, z)
@@ -170,7 +173,7 @@ for f in (sum2, sum5, sum6, sum9, sum10)
 end
 for f in (sum3, sum4, sum7, sum8)
     @test sum(z) == f(z)
-    @test_throws ArgumentError f(Int[])
+    @test_throws "reducing over an empty" f(Int[])
     @test sum(Int[7]) == f(Int[7]) == 7
 end
 @test typeof(sum(Int8[])) == typeof(sum(Int8[1])) == typeof(sum(Int8[1 7]))
@@ -239,8 +242,8 @@ prod2(itr) = invoke(prod, Tuple{Any}, itr)
 
 # maximum & minimum & extrema
 
-@test_throws ArgumentError maximum(Int[])
-@test_throws ArgumentError minimum(Int[])
+@test_throws "reducing over an empty" maximum(Int[])
+@test_throws "reducing over an empty" minimum(Int[])
 
 @test maximum(Int[]; init=-1) == -1
 @test minimum(Int[]; init=-1) == -1
@@ -391,22 +394,22 @@ end
 
 @testset "findmin(f, domain)" begin
     @test findmin(-, 1:10) == (-10, 10)
-    @test findmin(identity, [1, 2, 3, missing]) === (missing, missing)
-    @test findmin(identity, [1, NaN, 3, missing]) === (missing, missing)
-    @test findmin(identity, [1, missing, NaN, 3]) === (missing, missing)
-    @test findmin(identity, [1, NaN, 3]) === (NaN, NaN)
-    @test findmin(identity, [1, 3, NaN]) === (NaN, NaN)
-    @test all(findmin(cos, 0:π/2:2π) .≈ (-1.0, π))
+    @test findmin(identity, [1, 2, 3, missing]) === (missing, 4)
+    @test findmin(identity, [1, NaN, 3, missing]) === (missing, 4)
+    @test findmin(identity, [1, missing, NaN, 3]) === (missing, 2)
+    @test findmin(identity, [1, NaN, 3]) === (NaN, 2)
+    @test findmin(identity, [1, 3, NaN]) === (NaN, 3)
+    @test findmin(cos, 0:π/2:2π) == (-1.0, 3)
 end
 
 @testset "findmax(f, domain)" begin
     @test findmax(-, 1:10) == (-1, 1)
-    @test findmax(identity, [1, 2, 3, missing]) === (missing, missing)
-    @test findmax(identity, [1, NaN, 3, missing]) === (missing, missing)
-    @test findmax(identity, [1, missing, NaN, 3]) === (missing, missing)
-    @test findmax(identity, [1, NaN, 3]) === (NaN, NaN)
-    @test findmax(identity, [1, 3, NaN]) === (NaN, NaN)
-    @test findmax(cos, 0:π/2:2π) == (1.0, 0.0)
+    @test findmax(identity, [1, 2, 3, missing]) === (missing, 4)
+    @test findmax(identity, [1, NaN, 3, missing]) === (missing, 4)
+    @test findmax(identity, [1, missing, NaN, 3]) === (missing, 2)
+    @test findmax(identity, [1, NaN, 3]) === (NaN, 2)
+    @test findmax(identity, [1, 3, NaN]) === (NaN, 3)
+    @test findmax(cos, 0:π/2:2π) == (1.0, 1)
 end
 
 @testset "argmin(f, domain)" begin
@@ -460,8 +463,8 @@ end
 @test reduce((a, b) -> a .& b, fill(trues(5), 24))  == trues(5)
 @test reduce((a, b) -> a .& b, fill(falses(5), 24)) == falses(5)
 
-@test_throws TypeError any(x->0, [false])
-@test_throws TypeError all(x->0, [false])
+@test_throws TypeError any(Returns(0), [false])
+@test_throws TypeError all(Returns(0), [false])
 
 # short-circuiting any and all
 
@@ -594,14 +597,22 @@ end
 # issue #18695
 test18695(r) = sum( t^2 for t in r )
 @test @inferred(test18695([1.0,2.0,3.0,4.0])) == 30.0
-@test_throws ArgumentError test18695(Any[])
+@test_throws str -> ( occursin("reducing over an empty", str) &&
+                      occursin("consider supplying `init`", str) &&
+                     !occursin("or defining", str)) test18695(Any[])
+
+# For Core.IntrinsicFunction
+@test_throws str -> ( occursin("reducing over an empty", str) &&
+                      occursin("consider supplying `init`", str) &&
+                     !occursin("or defining", str)) reduce(Base.xor_int, Int[])
 
 # issue #21107
 @test foldr(-,2:2) == 2
 
 # test neutral element not picked incorrectly for &, |
 @test @inferred(foldl(&, Int[1])) === 1
-@test_throws ArgumentError foldl(&, Int[])
+@test_throws ["reducing over an empty",
+              "consider supplying `init`"] foldl(&, Int[])
 
 # prod on Chars
 @test prod(Char[]) == ""
@@ -646,3 +657,13 @@ end
 
 # issue #39281
 @test @inferred(extrema(rand(2), dims=1)) isa Vector{Tuple{Float64,Float64}}
+
+# issue #38627
+@testset "overflow in mapreduce" begin
+    # at len = 16 and len = 1025 there is a change in codepath
+    for len in [0, 1, 15, 16, 1024, 1025, 2048, 2049]
+        oa = OffsetArray(repeat([1], len), typemax(Int)-len)
+        @test sum(oa) == reduce(+, oa) == len
+        @test mapreduce(+, +, oa, oa) == 2len
+    end
+end
diff --git a/test/reducedim.jl b/test/reducedim.jl
index cc07cfff1dad34..f009a2384ca510 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -90,7 +90,7 @@ end
 
 # Combining dims and init
 A = Array{Int}(undef, 0, 3)
-@test_throws ArgumentError maximum(A; dims=1)
+@test_throws "reducing over an empty collection is not allowed" maximum(A; dims=1)
 @test maximum(A; dims=1, init=-1) == reshape([-1,-1,-1], 1, 3)
 
 # Test reduction along first dimension; this is special-cased for
@@ -169,8 +169,9 @@ end
     A = Matrix{Int}(undef, 0,1)
     @test sum(A) === 0
     @test prod(A) === 1
-    @test_throws ArgumentError minimum(A)
-    @test_throws ArgumentError maximum(A)
+    @test_throws ["reducing over an empty",
+                  "consider supplying `init`"] minimum(A)
+    @test_throws "consider supplying `init`" maximum(A)
 
     @test isequal(sum(A, dims=1), zeros(Int, 1, 1))
     @test isequal(sum(A, dims=2), zeros(Int, 0, 1))
@@ -182,9 +183,9 @@ end
     @test isequal(prod(A, dims=3), fill(1, 0, 1))
 
     for f in (minimum, maximum)
-        @test_throws ArgumentError f(A, dims=1)
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=1)
         @test isequal(f(A, dims=2), zeros(Int, 0, 1))
-        @test_throws ArgumentError f(A, dims=(1, 2))
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2))
         @test isequal(f(A, dims=3), zeros(Int, 0, 1))
     end
     for f in (findmin, findmax)
@@ -492,3 +493,16 @@ end
 
 @test @inferred(count(false:true, dims=:, init=0x0004)) === 0x0005
 @test @inferred(count(isodd, reshape(1:9, 3, 3), dims=:, init=Int128(0))) === Int128(5)
+
+@testset "reduced_index for BigInt (issue #39995)" begin
+    for T in [Int8, Int16, Int32, Int64, Int128, BigInt]
+        r = T(1):T(2)
+        ax = axes(r, 1)
+        axred = Base.reduced_index(ax)
+        @test axred == Base.OneTo(1)
+        @test typeof(axred) === typeof(ax)
+        r_red = reduce(+, r, dims = 1)
+        @test eltype(r_red) == T
+        @test r_red == [3]
+    end
+end
diff --git a/test/reflection.jl b/test/reflection.jl
index 63101796804bb9..4faa3678171049 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -224,7 +224,7 @@ let ex = :(a + b)
 end
 foo13825(::Array{T, N}, ::Array, ::Vector) where {T, N} = nothing
 @test startswith(string(first(methods(foo13825))),
-                 "foo13825(::Array{T, N}, ::Array, ::Vector{T} where T)")
+                 "foo13825(::Array{T, N}, ::Array, ::Vector) where {T, N} in")
 
 mutable struct TLayout
     x::Int8
@@ -883,6 +883,7 @@ _test_at_locals2(1,1,0.5f0)
     _dump_function(f31687_parent, Tuple{},
                    #=native=#false, #=wrapper=#false, #=strip=#false,
                    #=dump_module=#true, #=syntax=#:att, #=optimize=#false, :none,
+                   #=binary=#false,
                    params)
 end
 
@@ -935,3 +936,19 @@ end
     @test f !== Core._apply
     @test occursin("f2#", String(nameof(f)))
 end
+
+
+@testset "code_typed(; world)" begin
+    mod = @eval module $(gensym()) end
+
+    @eval mod foo() = 1
+    world1 = Base.get_world_counter()
+    @test only(code_typed(mod.foo, ())).second == Int
+    @test only(code_typed(mod.foo, (); world=world1)).second == Int
+
+    @eval mod foo() = 2.
+    world2 = Base.get_world_counter()
+    @test only(code_typed(mod.foo, ())).second == Float64
+    @test only(code_typed(mod.foo, (); world=world1)).second == Int
+    @test only(code_typed(mod.foo, (); world=world2)).second == Float64
+end
diff --git a/test/regex.jl b/test/regex.jl
index 0a28d3464579d6..0202dc4758e2fb 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -101,6 +101,17 @@
         @test keys(m) == ["a", 2, "b"]
     end
 
+    # Unicode named subpatterns and property mixes of scripts and classes (issues #35322/#35459 and #40231)
+    let m = match(r"(?<numéro>\d)[\pZs]*(?<文本>[\p{Han}\p{P}]+)", "1 孔生雪笠，聖裔也。為人蘊藉，工詩。")
+        @test haskey(m, :numéro)
+        @test haskey(m, "文本")
+        @test !haskey(m, "ゑ")
+        @test (m[:numéro], m[:文本]) == ("1", "孔生雪笠，聖裔也。為人蘊藉，工詩。")
+        @test (m[1], m[2]) == (m[:numéro], m[:文本])
+        @test sprint(show, m) == "RegexMatch(\"1 孔生雪笠，聖裔也。為人蘊藉，工詩。\", numéro=\"1\", 文本=\"孔生雪笠，聖裔也。為人蘊藉，工詩。\")"
+        @test keys(m) == ["numéro", "文本"]
+    end
+
     # Backcapture reference in substitution string
     @test replace("abcde", r"(..)(?P<byname>d)" => s"\g<byname>xy\\\1") == "adxy\\bce"
     @test_throws ErrorException replace("a", r"(?P<x>)" => s"\g<y>")
diff --git a/test/runtests.jl b/test/runtests.jl
index ad029557a33779..ea94fca8770579 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -91,6 +91,16 @@ prepend!(tests, linalg_tests)
 
 import LinearAlgebra
 cd(@__DIR__) do
+    # `net_on` implies that we have access to the loopback interface which is
+    # necessary for Distributed multi-processing. There are some test
+    # environments that do not allow access to loopback, so we must disable
+    # addprocs when `net_on` is false. Note that there exist build environments,
+    # including Nix, where `net_on` is false but we still have access to the
+    # loopback interface. It would be great to make this check more specific to
+    # identify those situations somehow. See
+    #   * https://github.com/JuliaLang/julia/issues/6722
+    #   * https://github.com/JuliaLang/julia/pull/29384
+    #   * https://github.com/JuliaLang/julia/pull/40348
     n = 1
     if net_on
         n = min(Sys.CPU_THREADS, length(tests))
@@ -353,7 +363,7 @@ cd(@__DIR__) do
         elseif isa(resp, Test.TestSetException)
             fake = Test.DefaultTestSet(testname)
             for i in 1:resp.pass
-                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing))
+                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, LineNumberNode(@__LINE__, @__FILE__)))
             end
             for i in 1:resp.broken
                 Test.record(fake, Test.Broken(:test, nothing))
diff --git a/test/ryu.jl b/test/ryu.jl
index 9970942575e0a8..cf60e4867e2362 100644
--- a/test/ryu.jl
+++ b/test/ryu.jl
@@ -544,6 +544,15 @@ end # Float16
         @test Ryu.writefixed(7.018232e-82, 6) == "0.000000"
     end
 
+    @testset "Trimming of trailing zeros" begin
+        @test Ryu.writefixed(0.0, 1, false, false, false, UInt8('.'), true) == "0"
+        @test Ryu.writefixed(1.0, 1, false, false, false, UInt8('.'), true) == "1"
+        @test Ryu.writefixed(2.0, 1, false, false, false, UInt8('.'), true) == "2"
+
+        @test Ryu.writefixed(1.25e+5, 0, false, false, false, UInt8('.'), true) == "125000"
+        @test Ryu.writefixed(1.25e+5, 1, false, false, false, UInt8('.'), true) == "125000"
+        @test Ryu.writefixed(1.25e+5, 2, false, false, false, UInt8('.'), true) == "125000"
+    end
 end # fixed
 
 @testset "Ryu.writeexp" begin
@@ -736,6 +745,12 @@ end
     @test Ryu.writeexp(1e+83, 1) == "1.0e+83"
 end
 
+@testset "Consistency of trimtrailingzeros" begin
+    @test Ryu.writeexp(0.0, 1, false, false, false, UInt8('e'), UInt8('.'), true) == "0e+00"
+    @test Ryu.writeexp(1.0, 1, false, false, false, UInt8('e'), UInt8('.'), true) == "1e+00"
+    @test Ryu.writeexp(2.0, 1, false, false, false, UInt8('e'), UInt8('.'), true) == "2e+00"
+end
+
 end # exp
 
 @testset "compact" begin
diff --git a/test/sets.jl b/test/sets.jl
index 46854dae957c6d..1e512faad5cdb8 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -22,6 +22,7 @@ using Dates
         @test isa(Set(sin(x) for x = 1:3), Set{Float64})
         @test isa(Set(f17741(x) for x = 1:3), Set{Int})
         @test isa(Set(f17741(x) for x = -1:1), Set{Integer})
+        @test isa(Set(f17741(x) for x = 1:0), Set{Integer})
     end
     let s1 = Set(["foo", "bar"]), s2 = Set(s1)
         @test s1 == s2
@@ -138,6 +139,10 @@ end
     @test !in(200,s)
 end
 
+@testset "copy(::KeySet) (issue #41537)" begin
+    @test union(keys(Dict(1=>2, 3=>4))) == copy(keys(Dict(1=>2, 3=>4))) == Set([1,3])
+end
+
 @testset "copy!" begin
     for S = (Set, BitSet)
         s = S([1, 2])
@@ -220,6 +225,16 @@ end
     s2 = Set([nothing])
     union!(s2, [nothing])
     @test s2 == Set([nothing])
+
+    @testset "promotion" begin
+        ints = [1:5, [1, 2], Set([1, 2])]
+        floats = [2:0.1:3, [2.0, 3.5], Set([2.0, 3.5])]
+
+        for a in ints, b in floats
+            @test eltype(union(a, b)) == Float64
+            @test eltype(union(b, a)) == Float64
+        end
+    end
 end
 
 @testset "intersect" begin
@@ -238,7 +253,7 @@ end
         end
     end
     @test intersect(Set([1]), BitSet()) isa Set{Int}
-    @test intersect(BitSet([1]), Set()) isa BitSet
+    @test intersect(BitSet([1]), Set()) isa Set{Any}
     @test intersect([1], BitSet()) isa Vector{Int}
     # intersect must uniquify
     @test intersect([1, 2, 1]) == intersect!([1, 2, 1]) == [1, 2]
@@ -249,7 +264,18 @@ end
     y = () ∩ (42,)
     @test isempty(x)
     @test isempty(y)
-    @test eltype(x) == eltype(y) == Union{}
+
+    # Discussed in PR#41769
+    @testset "promotion" begin
+        ints = [1:5, [1, 2], Set([1, 2])]
+        floats = [2:0.1:3, [2.0, 3.5], Set([2.0, 3.5])]
+
+        for a in ints, b in floats
+            @test eltype(intersect(a, b)) == Float64
+            @test eltype(intersect(b, a)) == Float64
+            @test eltype(intersect(a, a, b)) == Float64
+        end
+    end
 end
 
 @testset "setdiff" begin
@@ -756,3 +782,24 @@ Base.IteratorSize(::Type{<:OpenInterval}) = Base.SizeUnknown()
     @test 3 ∈ i
     @test issubset(3, i)
 end
+
+@testset "IdSet" begin
+    a = [1]
+    b = [2]
+    c = [3]
+    d = [4]
+    A = Base.IdSet{Vector{Int}}([a, b, c, d])
+    @test !isempty(A)
+    B = copy(A)
+    @test A ⊆ B
+    @test B ⊆ A
+    A = filter!(x->isodd(x[1]), A)
+    @test A ⊆ B
+    @test !(B ⊆ A)
+    @test !isempty(A)
+    a_ = pop!(A, a)
+    @test a_ === a
+    @test !isempty(A)
+    A = empty!(A)
+    @test isempty(A)
+end
diff --git a/test/show.jl b/test/show.jl
index 51e6abca85f9bb..f9c3eb1545b3e0 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -728,11 +728,18 @@ Base.zero(x::T12960) = T12960()
 let
     A = sparse(1.0I, 3, 3)
     B = similar(A, T12960)
-    @test sprint(show, B)  == "\n #undef             ⋅            ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef"
-    @test sprint(print, B) == "\n #undef             ⋅            ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef"
+    @test repr(B) == "sparse([1, 2, 3], [1, 2, 3], $T12960[#undef, #undef, #undef], 3, 3)"
+    @test occursin(
+        "\n #undef             ⋅            ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef",
+        repr(MIME("text/plain"), B),
+    )
+
     B[1,2] = T12960()
-    @test sprint(show, B)  == "\n #undef          T12960()        ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef"
-    @test sprint(print, B) == "\n #undef          T12960()        ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef"
+    @test repr(B)  == "sparse([1, 1, 2, 3], [1, 2, 2, 3], $T12960[#undef, $T12960(), #undef, #undef], 3, 3)"
+    @test occursin(
+        "\n #undef          T12960()        ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef",
+        repr(MIME("text/plain"), B),
+    )
 end
 
 # issue #13127
@@ -808,13 +815,20 @@ Base.methodloc_callback[] = nothing
     # test that no spurious visual lines are added when one element spans multiple lines
     v = fill!(Array{Any}(undef, 9), 0)
     v[1] = "look I'm wide! --- " ^ 9
-    @test replstr(v) == "9-element Vector{Any}:\n  \"look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- \"\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0"
-    @test replstr([fill(0, 9) v]) == "9×2 Matrix{Any}:\n 0  …   \"look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- \"\n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0"
+    r = replstr(v)
+    @test startswith(r, "9-element Vector{Any}:\n  \"look I'm wide! ---")
+    @test endswith(r, "look I'm wide! --- \"\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0")
+
     # test vertical/diagonal ellipsis
     v = fill!(Array{Any}(undef, 50), 0)
     v[1] = "look I'm wide! --- " ^ 9
-    @test replstr(v) == "50-element Vector{Any}:\n  \"look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- \"\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n ⋮\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0"
-    @test replstr([fill(0, 50) v]) == "50×2 Matrix{Any}:\n 0  …   \"look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- \"\n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0\n 0     0\n ⋮  ⋱  \n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0\n 0     0"
+    r = replstr(v)
+    @test startswith(r, "50-element Vector{Any}:\n  \"look I'm wide! ---")
+    @test endswith(r, "look I'm wide! --- \"\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n ⋮\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0")
+
+    r = replstr([fill(0, 50) v])
+    @test startswith(r, "50×2 Matrix{Any}:\n 0  …   \"look I'm wide! ---")
+    @test endswith(r, "look I'm wide! --- \"\n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0\n 0     0\n ⋮  ⋱  \n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0\n 0     0")
 
     # issue #34659
     @test replstr(Int32[]) == "Int32[]"
@@ -825,6 +839,45 @@ Base.methodloc_callback[] = nothing
     @test replstr([zeros(3,0),zeros(2,0)]) == "2-element Vector{Matrix{Float64}}:\n 3×0 Matrix{Float64}\n 2×0 Matrix{Float64}"
 end
 
+# string show with elision
+@testset "string show with elision" begin
+    @testset "elision logic" begin
+        strs = ["A", "∀", "∀A", "A∀", "😃"]
+        for limit = 0:100, len = 0:100, str in strs
+            str = str^len
+            str = str[1:nextind(str, 0, len)]
+            out = sprint() do io
+                show(io, MIME"text/plain"(), str; limit)
+            end
+            lower = length("\"\" ⋯ $(ncodeunits(str)) bytes ⋯ \"\"")
+            limit = max(limit, lower)
+            if length(str) + 2 ≤ limit
+                @test eval(Meta.parse(out)) == str
+            else
+                @test limit-!isascii(str) <= length(out) <= limit
+                re = r"(\"[^\"]*\") ⋯ (\d+) bytes ⋯ (\"[^\"]*\")"
+                m = match(re, out)
+                head = eval(Meta.parse(m.captures[1]))
+                tail = eval(Meta.parse(m.captures[3]))
+                skip = parse(Int, m.captures[2])
+                @test startswith(str, head)
+                @test endswith(str, tail)
+                @test ncodeunits(str) ==
+                    ncodeunits(head) + skip + ncodeunits(tail)
+            end
+        end
+    end
+
+    @testset "default elision limit" begin
+        r = replstr("x"^1000)
+        @test length(r) == 7*80
+        @test r == repr("x"^271) * " ⋯ 459 bytes ⋯ " * repr("x"^270)
+        r = replstr(["x"^1000])
+        @test length(r) < 120
+        @test r == "1-element Vector{String}:\n " * repr("x"^31) * " ⋯ 939 bytes ⋯ " * repr("x"^30)
+    end
+end
+
 # Issue 14121
 @test_repr "(A'x)'"
 
@@ -1148,7 +1201,7 @@ let x = [], y = [], z = Base.ImmutableDict(x => y)
         """
     dz = sprint(dump, z)
     @test 10 < countlines(IOBuffer(dz)) < 40
-    @test sum(x -> 1, eachmatch(r"circular reference", dz)) == 4
+    @test sum(Returns(1), eachmatch(r"circular reference", dz)) == 4
 end
 
 # PR 16221
@@ -1324,6 +1377,11 @@ let m = which(T20332{Int}(), (Int,)),
     mi = Core.Compiler.specialize_method(m, Tuple{T20332{T}, Int} where T, Core.svec())
     # test that this doesn't throw an error
     @test occursin("MethodInstance for", repr(mi))
+    # issue #41928
+    str = sprint(mi; context=:color=>true) do io, mi
+        printstyled(io, mi; color=:light_cyan)
+    end
+    @test !occursin("\U1b[0m", str)
 end
 
 @test sprint(show, Main) == "Main"
@@ -1569,12 +1627,77 @@ end
 end
 
 let x = TypeVar(:_), y = TypeVar(:_)
-    @test repr(UnionAll(x, UnionAll(y, Pair{x,y}))) == "Pair{_1, _2} where {_1, _2}"
-    @test repr(UnionAll(x, UnionAll(y, Pair{UnionAll(x,Ref{x}),y}))) == "Pair{Ref{_1} where _1, _1} where _1"
+    @test repr(UnionAll(x, UnionAll(y, Pair{x,y}))) == "Pair"
+    @test repr(UnionAll(y, UnionAll(x, Pair{x,y}))) == "Pair{_2, _1} where {_1, _2}"
+    @test repr(UnionAll(x, UnionAll(y, Pair{UnionAll(x,Ref{x}),y}))) == "Pair{Ref}"
+    @test repr(UnionAll(y, UnionAll(x, Pair{UnionAll(y,Ref{x}),y}))) == "Pair{Ref{_2}, _1} where {_1, _2}"
+end
+
+let x, y, x
     x = TypeVar(:a)
     y = TypeVar(:a)
     z = TypeVar(:a)
     @test repr(UnionAll(z, UnionAll(x, UnionAll(y, Tuple{x,y,z})))) == "Tuple{a1, a2, a} where {a, a1, a2}"
+    @test repr(UnionAll(z, UnionAll(x, UnionAll(y, Tuple{z,y,x})))) == "Tuple{a, a2, a1} where {a, a1, a2}"
+end
+
+let x = TypeVar(:_, Number), y = TypeVar(:_, Number)
+    @test repr(UnionAll(x, UnionAll(y, Pair{x,y}))) == "Pair{_1, _2} where {_1<:Number, _2<:Number}"
+    @test repr(UnionAll(y, UnionAll(x, Pair{x,y}))) == "Pair{_2, _1} where {_1<:Number, _2<:Number}"
+    @test repr(UnionAll(x, UnionAll(y, Pair{UnionAll(x,Ref{x}),y}))) == "Pair{Ref{_1} where _1<:Number, _1} where _1<:Number"
+    @test repr(UnionAll(y, UnionAll(x, Pair{UnionAll(y,Ref{x}),y}))) == "Pair{Ref{_2}, _1} where {_1<:Number, _2<:Number}"
+end
+
+
+is_juliarepr(x) = eval(Meta.parse(repr(x))) == x
+@testset "unionall types" begin
+    X = TypeVar(gensym())
+    Y = TypeVar(gensym(), Ref, Ref)
+    x, y, z = TypeVar(:a), TypeVar(:a), TypeVar(:a)
+    struct TestTVUpper{A<:Integer} end
+
+    # named typevars
+    @test is_juliarepr(Ref{A} where A)
+    @test is_juliarepr(Ref{A} where A>:Ref)
+    @test is_juliarepr(Ref{A} where A<:Ref)
+    @test is_juliarepr(Ref{A} where Ref<:A<:Ref)
+    @test is_juliarepr(TestTVUpper{<:Real})
+    @test is_juliarepr(TestTVUpper{<:Integer})
+    @test is_juliarepr(TestTVUpper{<:Signed})
+
+    # typearg order
+    @test is_juliarepr(UnionAll(X, Pair{X,<:Any}))
+    @test is_juliarepr(UnionAll(X, Pair{<:Any,X}))
+
+    # duplicates
+    @test is_juliarepr(UnionAll(X, Pair{X,X}))
+
+    # nesting
+    @test is_juliarepr(UnionAll(X, Ref{Ref{X}}))
+    @test is_juliarepr(Union{T, Int} where T)
+    @test is_juliarepr(Pair{A, <:A} where A)
+
+    # renumbered typevars with same names
+    @test is_juliarepr(UnionAll(z, UnionAll(x, UnionAll(y, Tuple{x,y,z}))))
+
+    # shortened typevar printing
+    @test repr(Ref{<:Any}) == "Ref"
+    @test repr(Pair{1, <:Any}) == "Pair{1}"
+    @test repr(Ref{<:Number}) == "Ref{<:Number}"
+    @test repr(Pair{1, <:Number}) == "Pair{1, <:Number}"
+    @test repr(Ref{<:Ref}) == "Ref{<:Ref}"
+    @test repr(Ref{>:Ref}) == "Ref{>:Ref}"
+    @test repr(Pair{<:Any, 1}) == "Pair{<:Any, 1}"
+    yname = sprint(Base.show_unquoted, Y.name)
+    @test repr(UnionAll(Y, Ref{Y})) == "Ref{$yname} where Ref<:$yname<:Ref"
+    @test endswith(repr(TestTVUpper{<:Real}), "TestTVUpper{<:Real}")
+    @test endswith(repr(TestTVUpper), "TestTVUpper")
+    @test endswith(repr(TestTVUpper{<:Signed}), "TestTVUpper{<:Signed}")
+
+    # exception for tuples
+    @test is_juliarepr(Tuple)
+    @test is_juliarepr(Tuple{})
+    @test is_juliarepr(Tuple{<:Any})
 end
 
 @testset "showarg" begin
@@ -1602,6 +1725,11 @@ end
     @test summary(p) == "2-element reinterpret(reshape, Tuple{Float32, Float32}, ::Matrix{Float32}) with eltype Tuple{Float32, Float32}"
     @test Base.showarg(io, p, false) === nothing
     @test String(take!(io)) == "reinterpret(reshape, Tuple{Float32, Float32}, ::Matrix{Float32})"
+
+    r = Base.IdentityUnitRange(2:2)
+    B = @view ones(2)[r]
+    Base.showarg(io, B, false)
+    @test String(take!(io)) == "view(::Vector{Float64}, $(repr(r)))"
 end
 
 @testset "Methods" begin
@@ -1646,7 +1774,7 @@ end
     # spurious binding resolutions
     show(IOContext(b, :module => TestShowType), Base.Pair)
     @test !Base.isbindingresolved(TestShowType, :Pair)
-    @test String(take!(b)) == "Base.Pair"
+    @test String(take!(b)) == "Core.Pair"
     show(IOContext(b, :module => TestShowType), Base.Complex)
     @test Base.isbindingresolved(TestShowType, :Complex)
     @test String(take!(b)) == "Complex"
@@ -1659,7 +1787,7 @@ end
     @test showstr([Float16(1)]) == "Float16[1.0]"
     @test showstr([[Float16(1)]]) == "Vector{Float16}[[1.0]]"
     @test replstr(Real[Float16(1)]) == "1-element Vector{Real}:\n Float16(1.0)"
-    @test replstr(Array{Real}[Real[1]]) == "1-element Vector{Array{Real, N} where N}:\n [1]"
+    @test replstr(Array{Real}[Real[1]]) == "1-element Vector{Array{Real}}:\n [1]"
     # printing tuples (Issue #25042)
     @test replstr(fill((Int64(1), zeros(Float16, 3)), 1)) ==
                  "1-element Vector{Tuple{Int64, Vector{Float16}}}:\n (1, [0.0, 0.0, 0.0])"
@@ -1702,13 +1830,13 @@ end
     @test showstr(Dict(true=>false)) == "Dict{Bool, Bool}(1 => 0)"
     @test showstr(Dict((1 => 2) => (3 => 4))) == "Dict((1 => 2) => (3 => 4))"
 
-    # issue #27979 (dislaying arrays of pairs containing arrays as first member)
+    # issue #27979 (displaying arrays of pairs containing arrays as first member)
     @test replstr([[1.0]=>1.0]) == "1-element Vector{Pair{Vector{Float64}, Float64}}:\n [1.0] => 1.0"
 
     # issue #28159
     @test replstr([(a=1, b=2), (a=3,c=4)]) == "2-element Vector{NamedTuple{names, Tuple{$Int, $Int}} where names}:\n (a = 1, b = 2)\n (a = 3, c = 4)"
 
-    @test replstr(Vector[Any[1]]) == "1-element Vector{Vector{T} where T}:\n Any[1]"
+    @test replstr(Vector[Any[1]]) == "1-element Vector{Vector}:\n Any[1]"
     @test replstr(AbstractDict{Integer,Integer}[Dict{Integer,Integer}(1=>2)]) ==
         "1-element Vector{AbstractDict{Integer, Integer}}:\n Dict(1 => 2)"
 
@@ -1839,7 +1967,7 @@ h_line() = f_line()
 @test sprint(Base.show_unquoted, Core.Compiler.Argument(-2)) == "_-2"
 
 
-eval(Meta.parse("""function my_fun28173(x)
+eval(Meta._parse_string("""function my_fun28173(x)
     y = if x == 1
             "HI"
         elseif x == 2
@@ -1856,7 +1984,7 @@ eval(Meta.parse("""function my_fun28173(x)
             "three"
         end
     return y
-end""")) # use parse to control the line numbers
+end""", "a"^80, 1, :statement)[1]) # use parse to control the line numbers
 let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
     fill!(src.codelocs, 0) # IRCode printing is only capable of printing partial line info
@@ -1866,13 +1994,13 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
         @test repr(src) == repr_ir
     end
     lines1 = split(repr(ir), '\n')
-    @test isempty(pop!(lines1))
+    @test all(isspace, pop!(lines1))
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(1), Val{1}), false)
     Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(2), Val{2}), true)
     Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(3), Val{3}), false)
     Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(4), Val{4}), true)
     lines2 = split(repr(ir), '\n')
-    @test isempty(pop!(lines2))
+    @test all(isspace, pop!(lines2))
     @test popfirst!(lines2) == "2  1 ──       $(QuoteNode(1))"
     @test popfirst!(lines2) == "   │          $(QuoteNode(2))" # TODO: this should print after the next statement
     let line1 = popfirst!(lines1)
@@ -1893,9 +2021,9 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
 
     # verbose linetable
     io = IOBuffer()
-    Base.IRShow.show_ir(io, ir; verbose_linetable=true)
+    Base.IRShow.show_ir(io, ir, Base.IRShow.default_config(ir; verbose_linetable=true))
     seekstart(io)
-    @test count(contains(r"my_fun28173 at none:\d+"), eachline(io)) == 9
+    @test count(contains(r"@ a{80}:\d+ within `my_fun28173"), eachline(io)) == 10
 end
 
 # Verify that extra instructions at the end of the IR
@@ -1905,8 +2033,8 @@ let src = code_typed(gcd, (Int, Int), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
     push!(ir.stmts.inst, Core.Compiler.ReturnNode())
     lines = split(sprint(show, ir), '\n')
-    @test isempty(pop!(lines))
-    @test pop!(lines) == "   ! ──       unreachable::#UNDEF"
+    @test all(isspace, pop!(lines))
+    @test pop!(lines) == "   !!! ──       unreachable::#UNDEF"
 end
 
 @testset "printing and interpolating nothing" begin
@@ -1935,7 +2063,7 @@ end
 
 @testset """printing "Any" is not skipped with nested arrays""" begin
     @test replstr(Union{X28004,Vector}[X28004(Any[X28004(1)])], :compact => true) ==
-        "1-element Vector{Union{X28004, Vector{T} where T}}:\n X(Any[X(1)])"
+        "1-element Vector{Union{X28004, Vector}}:\n X(Any[X(1)])"
 end
 
 # Issue 25589 - Underlines in cmd printing
@@ -2102,15 +2230,17 @@ end
 @test Base.make_typealias(M37012.AStruct{1}) === nothing
 @test isempty(Base.make_typealiases(M37012.AStruct{1})[1])
 @test string(M37012.AStruct{1}) == "$(curmod_prefix)M37012.AStruct{1}"
-@test string(Union{Nothing, Number, Vector}) == "Union{Nothing, Number, Vector{T} where T}"
-@test string(Union{Nothing, AbstractVecOrMat}) == "Union{Nothing, AbstractVecOrMat{T} where T}"
+@test string(Union{Nothing, Number, Vector}) == "Union{Nothing, Number, Vector}"
+@test string(Union{Nothing, Number, Vector{<:Integer}}) == "Union{Nothing, Number, Vector{<:Integer}}"
+@test string(Union{Nothing, AbstractVecOrMat}) == "Union{Nothing, AbstractVecOrMat}"
+@test string(Union{Nothing, AbstractVecOrMat{<:Integer}}) == "Union{Nothing, AbstractVecOrMat{<:Integer}}"
 @test string(M37012.BStruct{T, T} where T) == "$(curmod_prefix)M37012.B2{T, T} where T"
 @test string(M37012.BStruct{T, S} where {T<:Unsigned, S<:Signed}) == "$(curmod_prefix)M37012.B2{S, T} where {T<:Unsigned, S<:Signed}"
 @test string(M37012.BStruct{T, S} where {T<:Signed, S<:T}) == "$(curmod_prefix)M37012.B2{S, T} where {T<:Signed, S<:T}"
 @test string(Union{M37012.SimpleU, Nothing}) == "Union{Nothing, $(curmod_prefix)M37012.SimpleU}"
 @test string(Union{M37012.SimpleU, Nothing, T} where T) == "Union{Nothing, $(curmod_prefix)M37012.SimpleU, T} where T"
 @test string(Union{AbstractVector{T}, T} where T) == "Union{AbstractVector{T}, T} where T"
-@test string(Union{AbstractVector, T} where T) == "Union{AbstractVector{T} where T, T} where T"
+@test string(Union{AbstractVector, T} where T) == "Union{AbstractVector, T} where T"
 
 @test sprint(show, :(./)) == ":((./))"
 @test sprint(show, :((.|).(.&, b))) == ":((.|).((.&), b))"
@@ -2168,3 +2298,30 @@ end
     s = sprint(show, MIME("text/plain"), Function)
     @test s == "Function"
 end
+
+@testset "printing inline n-dimensional arrays and one-column matrices" begin
+    @test replstr([Int[1 2 3 ;;; 4 5 6]]) == "1-element Vector{Array{$Int, 3}}:\n [1 2 3;;; 4 5 6]"
+    @test replstr([Int[1 2 3 ;;; 4 5 6;;;;]]) == "1-element Vector{Array{$Int, 4}}:\n [1 2 3;;; 4 5 6;;;;]"
+    @test replstr([fill(1, (20,20,20))]) == "1-element Vector{Array{$Int, 3}}:\n [1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; … ;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1]"
+    @test replstr([fill(1, 5, 1)]) == "1-element Vector{Matrix{$Int}}:\n [1; 1; … ; 1; 1;;]"
+    @test replstr([fill(1, 5, 2)]) == "1-element Vector{Matrix{$Int}}:\n [1 1; 1 1; … ; 1 1; 1 1]"
+    @test replstr([[1;]]) == "1-element Vector{Vector{$Int}}:\n [1]"
+    @test replstr([[1;;]]) == "1-element Vector{Matrix{$Int}}:\n [1;;]"
+    @test replstr([[1;;;]]) == "1-element Vector{Array{$Int, 3}}:\n [1;;;]"
+end
+
+@testset "ncat and nrow" begin
+    @test_repr "[1;;]"
+    @test_repr "[1;;;]"
+    @test_repr "[1;; 2]"
+    @test_repr "[1;;; 2]"
+    @test_repr "[1;;; 2 3;;; 4]"
+    @test_repr "[1;;; 2;;;; 3;;; 4]"
+
+    @test_repr "T[1;;]"
+    @test_repr "T[1;;;]"
+    @test_repr "T[1;; 2]"
+    @test_repr "T[1;;; 2]"
+    @test_repr "T[1;;; 2 3;;; 4]"
+    @test_repr "T[1;;; 2;;;; 3;;; 4]"
+end
diff --git a/test/simdloop.jl b/test/simdloop.jl
index 1920cfa6140b1d..88e41364ef2221 100644
--- a/test/simdloop.jl
+++ b/test/simdloop.jl
@@ -92,19 +92,6 @@ import Base.SimdLoop.SimdError
 
 # Test that @simd rejects inner loop body with invalid control flow statements
 # issue #8613
-macro test_throws(ty, ex)
-    return quote
-        Test.@test_throws $(esc(ty)) try
-            $(esc(ex))
-        catch err
-            @test err isa LoadError
-            @test err.file === $(string(__source__.file))
-            @test err.line === $(__source__.line + 1)
-            rethrow(err.error)
-        end
-    end
-end
-
 @test_throws SimdError("break is not allowed inside a @simd loop body") @macroexpand begin
     @simd for x = 1:10
         x == 1 && break
diff --git a/test/some.jl b/test/some.jl
index 224eb8600814ca..27d50ca354a494 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -79,6 +79,21 @@
     @test something(missing, nothing, missing) === missing
 end
 
+@testset "@something" begin
+    @test_throws ArgumentError @something()
+    @test_throws ArgumentError @something(nothing)
+    @test @something(1) === 1
+    @test @something(Some(nothing)) === nothing
+
+    @test @something(1, error("failed")) === 1
+    @test_throws ErrorException @something(nothing, error("failed"))
+
+    # Ensure that the internal variable doesn't conflict with a user defined variable
+    @test let val = 1
+        @something(val)
+    end == 1
+end
+
 # issue #26927
 a = [missing, nothing, Some(nothing), Some(missing)]
 @test a isa Vector{Union{Missing, Nothing, Some}}
diff --git a/test/sorting.jl b/test/sorting.jl
index 6bf7d60bd859db..8946051c73a180 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -105,7 +105,7 @@ end
         @test searchsorted(fill(R(1), 15), T(1), 6, 10, Forward) == 6:10
     end
 
-    for (rg,I) in [(49:57,47:59), (1:2:17,-1:19), (-3:0.5:2,-5:.5:4)]
+    for (rg,I) in Any[(49:57,47:59), (1:2:17,-1:19), (-3:0.5:2,-5:.5:4)]
         rg_r = reverse(rg)
         rgv, rgv_r = [rg;], [rg_r;]
         for i = I
@@ -142,9 +142,29 @@ end
         @test searchsortedlast(500:1.0:600, 1.0e20) == 101
     end
 
+    @testset "issue 10966" begin
+        for R in numTypes, T in numTypes
+            @test searchsortedfirst(R(2):R(2), T(0)) == 1
+            @test searchsortedfirst(R(2):R(2), T(2)) == 1
+            @test searchsortedfirst(R(2):R(2), T(3)) == 2
+            @test searchsortedfirst(R(1):1//2:R(5), T(0)) == 1
+            @test searchsortedfirst(R(1):1//2:R(5), T(2)) == 3
+            @test searchsortedfirst(R(1):1//2:R(5), T(6)) == 10
+            @test searchsortedlast(R(2):R(2), T(0)) == 0
+            @test searchsortedlast(R(2):R(2), T(2)) == 1
+            @test searchsortedlast(R(2):R(2), T(3)) == 1
+            @test searchsortedlast(R(1):1//2:R(5), T(0)) == 0
+            @test searchsortedlast(R(1):1//2:R(5), T(2)) == 3
+            @test searchsortedlast(R(1):1//2:R(5), T(6)) == 9
+            @test searchsorted(R(2):R(2), T(0)) === 1:0
+            @test searchsorted(R(2):R(2), T(2)) == 1:1
+            @test searchsorted(R(2):R(2), T(3)) === 2:1
+        end
+    end
+
     @testset "issue 32568" begin
         for R in numTypes, T in numTypes
-            for arr in [R[1:5;], R(1):R(5), R(1):2:R(5)]
+            for arr in Any[R[1:5;], R(1):R(5), R(1):2:R(5)]
                 @test eltype(searchsorted(arr, T(2))) == keytype(arr)
                 @test eltype(searchsorted(arr, T(2), big(1), big(4), Forward)) == keytype(arr)
                 @test searchsortedfirst(arr, T(2)) isa keytype(arr)
@@ -164,35 +184,46 @@ end
         @test searchsorted([1,2], Inf) === 3:2
         @test searchsorted(1:2,   Inf) === 3:2
 
-        for coll in [
+        for coll in Any[
                 Base.OneTo(10),
                 1:2,
+                0x01:0x02,
                 -4:6,
                 5:2:10,
                 [1,2],
                 1.0:4,
                 [10.0,20.0],
             ]
-            for huge in [Inf, 1e300]
+            for huge in Any[Inf, 1e300, typemax(Int64), typemax(UInt64)]
                 @test searchsortedfirst(coll, huge) === lastindex(coll) + 1
-                @test searchsortedfirst(coll, -huge)=== firstindex(coll)
                 @test searchsortedlast(coll, huge)  === lastindex(coll)
-                @test searchsortedlast(coll, -huge) === firstindex(coll) - 1
                 @test searchsorted(coll, huge)      === lastindex(coll)+1 : lastindex(coll)
-                @test searchsorted(coll, -huge)     === firstindex(coll) : firstindex(coll) - 1
-
-                @test searchsortedfirst(reverse(coll), huge, rev=true) === firstindex(coll)
-                @test searchsortedfirst(reverse(coll), -huge, rev=true) === lastindex(coll) + 1
-                @test searchsortedlast(reverse(coll), huge, rev=true) === firstindex(coll) - 1
-                @test searchsortedlast(reverse(coll), -huge, rev=true) === lastindex(coll)
-                @test searchsorted(reverse(coll), huge, rev=true) === firstindex(coll):firstindex(coll) - 1
-                @test searchsorted(reverse(coll), -huge, rev=true) === lastindex(coll)+1:lastindex(coll)
+                if !(eltype(coll) <: Unsigned)
+                    @test searchsortedfirst(reverse(coll), huge, rev=true) === firstindex(coll)
+                    @test searchsortedlast(reverse(coll), huge, rev=true) === firstindex(coll) - 1
+                    @test searchsorted(reverse(coll), huge, rev=true) === firstindex(coll):firstindex(coll) - 1
+                end
+
+                if !(huge isa Unsigned)
+                    @test searchsortedfirst(coll, -huge)=== firstindex(coll)
+                    @test searchsortedlast(coll, -huge) === firstindex(coll) - 1
+                    @test searchsorted(coll, -huge)     === firstindex(coll) : firstindex(coll) - 1
+                    if !(eltype(coll) <: Unsigned)
+                        @test searchsortedfirst(reverse(coll), -huge, rev=true) === lastindex(coll) + 1
+                        @test searchsortedlast(reverse(coll), -huge, rev=true) === lastindex(coll)
+                        @test searchsorted(reverse(coll), -huge, rev=true) === lastindex(coll)+1:lastindex(coll)
+                    end
+                end
             end
         end
-        @testset "issue ##34408" begin
+
+        @test_broken length(reverse(0x1:0x2)) == 2
+        @testset "issue #34408" begin
             r = 1f8-10:1f8
             # collect(r) = Float32[9.999999e7, 9.999999e7, 9.999999e7, 9.999999e7, 1.0e8, 1.0e8, 1.0e8, 1.0e8, 1.0e8]
-            @test_broken searchsorted(collect(r)) == searchsorted(r)
+            for i in r
+                @test_broken searchsorted(collect(r), i) == searchsorted(r, i)
+            end
         end
     end
     @testset "issue #35272" begin
@@ -329,7 +360,7 @@ end
         @test insorted(T(10), R.(collect(1:10)), by=(>=(5)))
     end
 
-    for (rg,I) in [(49:57,47:59), (1:2:17,-1:19), (-3:0.5:2,-5:.5:4)]
+    for (rg,I) in Any[(49:57,47:59), (1:2:17,-1:19), (-3:0.5:2,-5:.5:4)]
         rg_r = reverse(rg)
         rgv, rgv_r = collect(rg), collect(rg_r)
         for i = I
diff --git a/test/spawn.jl b/test/spawn.jl
index dcc1bae2341186..ab8accf65e64a0 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -55,8 +55,8 @@ out = read(`$echocmd hello` & `$echocmd world`, String)
 
 @test (run(`$printfcmd "       \033[34m[stdio passthrough ok]\033[0m\n"`); true)
 
-# Test for SIGPIPE being treated as normal termination (throws an error if broken)
-Sys.isunix() && run(pipeline(yescmd, `head`, devnull))
+# Test for SIGPIPE being a failure condition
+@test_throws ProcessFailedException run(pipeline(yescmd, `head`, devnull))
 
 let p = run(pipeline(yescmd, devnull), wait=false)
     t = @async kill(p)
@@ -261,6 +261,74 @@ end
     end
 end
 
+@testset "redirect_stdio" begin
+
+    function hello_err_out()
+        println(stderr, "hello from stderr")
+        println(stdout, "hello from stdout")
+    end
+    @testset "same path for multiple streams" begin
+        @test_throws ArgumentError redirect_stdio(hello_err_out,
+                                            stdin="samepath.txt", stdout="samepath.txt")
+        @test_throws ArgumentError redirect_stdio(hello_err_out,
+                                            stdin="samepath.txt", stderr="samepath.txt")
+
+        @test_throws ArgumentError redirect_stdio(hello_err_out,
+                                            stdin=joinpath("tricky", "..", "samepath.txt"),
+                                            stderr="samepath.txt")
+        mktempdir() do dir
+            path = joinpath(dir, "stdouterr.txt")
+            redirect_stdio(hello_err_out, stdout=path, stderr=path)
+            @test read(path, String) == """
+            hello from stderr
+            hello from stdout
+            """
+        end
+    end
+
+    mktempdir() do dir
+        path_stdout = joinpath(dir, "stdout.txt")
+        path_stderr = joinpath(dir, "stderr.txt")
+        redirect_stdio(hello_err_out, stderr=devnull, stdout=path_stdout)
+        @test read(path_stdout, String) == "hello from stdout\n"
+
+        open(path_stderr, "w") do ioerr
+            redirect_stdio(hello_err_out, stderr=ioerr, stdout=devnull)
+        end
+        @test read(path_stderr, String) == "hello from stderr\n"
+    end
+
+    mktempdir() do dir
+        path_stderr = joinpath(dir, "stderr.txt")
+        path_stdin  = joinpath(dir, "stdin.txt")
+        path_stdout = joinpath(dir, "stdout.txt")
+
+        content_stderr = randstring()
+        content_stdout = randstring()
+
+        redirect_stdio(stdout=path_stdout, stderr=path_stderr) do
+            print(content_stdout)
+            print(stderr, content_stderr)
+        end
+
+        @test read(path_stderr, String) == content_stderr
+        @test read(path_stdout, String) == content_stdout
+    end
+
+    # stdin is unavailable on the workers. Run test on master.
+    ret = Core.eval(Main,
+            quote
+                remotecall_fetch(1) do
+                    mktempdir() do dir
+                        path = joinpath(dir, "stdin.txt")
+                        write(path, "hello from stdin\n")
+                        redirect_stdio(readline, stdin=path)
+                    end
+                end
+            end)
+    @test ret == "hello from stdin"
+end
+
 # issue #36136
 @testset "redirect to devnull" begin
     @test redirect_stdout(devnull) do; println("Hello") end === nothing
@@ -511,8 +579,8 @@ end
 @test_throws ArgumentError run(Base.AndCmds(`$truecmd`, ``))
 
 # tests for reducing over collection of Cmd
-@test_throws ArgumentError reduce(&, Base.AbstractCmd[])
-@test_throws ArgumentError reduce(&, Base.Cmd[])
+@test_throws "reducing over an empty collection is not allowed" reduce(&, Base.AbstractCmd[])
+@test_throws "reducing over an empty collection is not allowed" reduce(&, Base.Cmd[])
 @test reduce(&, [`$echocmd abc`, `$echocmd def`, `$echocmd hij`]) == `$echocmd abc` & `$echocmd def` & `$echocmd hij`
 
 # readlines(::Cmd), accidentally broken in #20203
@@ -582,8 +650,8 @@ end
 psep = if Sys.iswindows() ";" else ":" end
 withenv("PATH" => "$(Sys.BINDIR)$(psep)$(ENV["PATH"])") do
     julia_exe = joinpath(Sys.BINDIR, Base.julia_exename())
-    @test Sys.which("julia") == realpath(julia_exe)
-    @test Sys.which(julia_exe) == realpath(julia_exe)
+    @test Sys.which(Base.julia_exename()) == abspath(julia_exe)
+    @test Sys.which(julia_exe) == abspath(julia_exe)
 end
 
 # Check that which behaves correctly when passed an empty string
@@ -598,8 +666,8 @@ mktempdir() do dir
         touch(foo_path)
         chmod(foo_path, 0o777)
         if !Sys.iswindows()
-            @test Sys.which("foo") == realpath(foo_path)
-            @test Sys.which(foo_path) == realpath(foo_path)
+            @test Sys.which("foo") == abspath(foo_path)
+            @test Sys.which(foo_path) == abspath(foo_path)
 
             chmod(foo_path, 0o666)
             @test Sys.which("foo") === nothing
@@ -636,20 +704,20 @@ mktempdir() do dir
         touch(foo2_path)
         chmod(foo1_path, 0o777)
         chmod(foo2_path, 0o777)
-        @test Sys.which("foo") == realpath(foo1_path)
+        @test Sys.which("foo") == abspath(foo1_path)
 
         # chmod() doesn't change which() on Windows, so don't bother to test that
         if !Sys.iswindows()
             chmod(foo1_path, 0o666)
-            @test Sys.which("foo") == realpath(foo2_path)
+            @test Sys.which("foo") == abspath(foo2_path)
             chmod(foo1_path, 0o777)
         end
 
         if Sys.iswindows()
             # On windows, check that pwd() takes precedence, except when we provide a path
             cd(joinpath(dir, "bin2")) do
-                @test Sys.which("foo") == realpath(foo2_path)
-                @test Sys.which(foo1_path) == realpath(foo1_path)
+                @test Sys.which("foo") == abspath(foo2_path)
+                @test Sys.which(foo1_path) == abspath(foo1_path)
             end
         end
 
@@ -662,7 +730,9 @@ mktempdir() do dir
         touch(bar_path)
         chmod(bar_path, 0o777)
         cd(dir) do
-            @test Sys.which(joinpath("bin1", "bar")) == realpath(bar_path)
+            p = Sys.which(joinpath("bin1", "bar"))
+            @test p == abspath("bin1", basename(bar_path))
+            @test Base.samefile(p, bar_path)
         end
     end
 end
@@ -695,7 +765,21 @@ let text = "input-test-text"
     @test read(proc, String) == string(length(text), '\n')
     @test success(proc)
     @test String(take!(b)) == text
+
+    out = Base.BufferStream()
+    proc = run(catcmd, IOBuffer(text), out, wait=false)
+    @test proc.out === out
+    @test read(out, String) == text
+    @test success(proc)
+
+    out = PipeBuffer()
+    proc = run(catcmd, IOBuffer(SubString(text)), out)
+    @test success(proc)
+    @test proc.out === proc.err === proc.in === devnull
+    @test String(take!(out)) == text
 end
+
+
 @test repr(Base.CmdRedirect(``, devnull, 0, false)) == "pipeline(``, stdin>Base.DevNull())"
 @test repr(Base.CmdRedirect(``, devnull, 1, true)) == "pipeline(``, stdout<Base.DevNull())"
 @test repr(Base.CmdRedirect(``, devnull, 11, true)) == "pipeline(``, 11<Base.DevNull())"
@@ -742,6 +826,20 @@ if Sys.iswindows()
 end
 
 
+# test (t)csh escaping if tcsh is installed
+cshcmd = "/bin/tcsh"
+if isfile(cshcmd)
+    csh_echo(s) = chop(read(Cmd([cshcmd, "-c",
+                                 "echo " * Base.shell_escape_csh(s)]), String))
+    csh_test(s) = csh_echo(s) == s
+    @testset "shell_escape_csh" begin
+        for s in ["", "-a/b", "'", "'£\"", join(' ':'~') ^ 2,
+                  "\t", "\n", "'\n", "\"\n", "'\n\n\""]
+            @test csh_test(s)
+        end
+    end
+end
+
 @testset "shell escaping on Windows" begin
     # Note  argument A can be parsed both as A or "A".
     # We do not test that the parsing satisfies either of these conditions.
diff --git a/test/strings/search.jl b/test/strings/search.jl
index 5f52f8024cdd12..d328168bfa4665 100644
--- a/test/strings/search.jl
+++ b/test/strings/search.jl
@@ -97,6 +97,8 @@ for str in [astr]
     @test findprev('l', str, 2) == nothing
     @test findlast(',', str) == 6
     @test findprev(',', str, 5) == nothing
+    @test findlast(str, "") == nothing
+    @test findlast(str^2, str) == nothing
     @test findlast('\n', str) == 14
 end
 
@@ -396,30 +398,36 @@ end
 # issue 37280
 @testset "UInt8, Int8 vector" begin
     for T in [Int8, UInt8], VT in [Int8, UInt8]
-        A = T[0x40, 0x52, 0x62, 0x52, 0x62]
-
-        @test findfirst(VT[0x30], A) === nothing
-        @test findfirst(VT[0x52], A) === 2:2
-        @test findlast(VT[0x30], A) === nothing
-        @test findlast(VT[0x52], A) === 4:4
-
-        pattern = VT[0x52, 0x62]
-
-        @test findfirst(pattern, A) === 2:3
-        @test findnext(pattern, A, 2) === 2:3
-        @test findnext(pattern, A, 3) === 4:5
-        # 1 idx too far is allowed
-        @test findnext(pattern, A, length(A)+1) === nothing
-        @test_throws BoundsError findnext(pattern, A, -3)
-        @test_throws BoundsError findnext(pattern, A, length(A)+2)
-
-        @test findlast(pattern, A) === 4:5
-        @test findprev(pattern, A, 3) === 2:3
-        @test findprev(pattern, A, 5) === 4:5
-        @test findprev(pattern, A, 2) === nothing
-        @test findprev(pattern, A, length(A)+1) == findlast(pattern, A)
-        @test findprev(pattern, A, length(A)+2) == findlast(pattern, A)
-        @test_throws BoundsError findprev(pattern, A, -3)
+        A = T[0x40, 0x52, 0x00, 0x52, 0x00]
+
+        for A in (A, @view(A[1:end]), codeunits(String(copyto!(Vector{UInt8}(undef,5), A))))
+            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) == nothing
+            @test findfirst(VT[0x52], A) === 2:2
+            @test findfirst(==(VT(0x52)), A) === 2
+            @test findlast(VT[0x30], A) === findlast(==(VT(0x30)), A) === nothing
+            @test findlast(VT[0x52], A) === 4:4
+            @test findlast(==(VT(0x52)), A) === 4
+            @test findfirst(iszero, A) === 3 === findprev(iszero, A, 4)
+            @test findlast(iszero, A) === 5 === findnext(iszero, A, 4)
+
+            pattern = VT[0x52, 0x00]
+
+            @test findfirst(pattern, A) === 2:3
+            @test findnext(pattern, A, 2) === 2:3
+            @test findnext(pattern, A, 3) === 4:5
+            # 1 idx too far is allowed
+            @test findnext(pattern, A, length(A)+1) === nothing
+            @test_throws BoundsError findnext(pattern, A, -3)
+            @test_throws BoundsError findnext(pattern, A, length(A)+2)
+
+            @test findlast(pattern, A) === 4:5
+            @test findprev(pattern, A, 3) === 2:3
+            @test findprev(pattern, A, 5) === 4:5
+            @test findprev(pattern, A, 2) === nothing
+            @test findprev(pattern, A, length(A)+1) == findlast(pattern, A)
+            @test findprev(pattern, A, length(A)+2) == findlast(pattern, A)
+            @test_throws BoundsError findprev(pattern, A, -3)
+        end
     end
 end
 
diff --git a/test/strings/util.jl b/test/strings/util.jl
index 617ff31106634f..2b08e2819e33bd 100644
--- a/test/strings/util.jl
+++ b/test/strings/util.jl
@@ -44,6 +44,11 @@
     # Issue #32160 (unsigned underflow in lpad/rpad)
     @test lpad("xx", UInt(1), " ") == "xx"
     @test rpad("xx", UInt(1), " ") == "xx"
+    # Issue #38256 (lpad/rpad defined in terms of textwidth)
+    @test lpad("⟨k|H₁|k̃⟩", 12) |> textwidth == 12
+    @test rpad("⟨k|H₁|k̃⟩", 12) |> textwidth == 12
+    @test lpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
+    @test rpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
 end
 
 # string manipulation
@@ -158,6 +163,7 @@ end
     @test split("", "") == rsplit("", "") == [""]
     @test split("abc", "") == rsplit("abc", "") == ["a","b","c"]
     @test rsplit("abc", "", limit=2) == ["ab","c"]
+    @test rsplit("", "//") == [""]
     @test split("abc", "", limit=2) == ["a","bc"]
 
     @test split("", r"") == [""]
@@ -275,6 +281,11 @@ end
     # Issue 13332
     @test replace("abc", 'b' => 2.1) == "a2.1c"
 
+    # Issue 31456
+    @test replace("The fox.", r"fox(es)?" => s"bus\1") == "The bus."
+    @test replace("The foxes.", r"fox(es)?" => s"bus\1") == "The buses."
+    @test replace("The quick fox quickly.", r"(quick)?\sfox(es)?\s(run)?" => s"\1 bus\2 \3") == "The quick bus quickly."
+
     # test replace with a count for String and GenericString
     # check that replace is a no-op if count==0
     for s in ["aaa", Test.GenericString("aaa")]
@@ -302,6 +313,178 @@ end
 
 end
 
+@testset "replace many" begin
+    # PR 35414 Francesco Alemanno <francescoalemanno710@gmail.com>
+    @test replace("foobarbaz", "oo" => "zz", "ar" => "zz", "z" => "m") == "fzzbzzbam"
+    substmp=["z" => "m", "oo" => "zz", "ar" => "zz"]
+    for perm in [[1, 2, 3], [2, 1, 3], [3, 2, 1], [2, 3, 1], [1, 3, 2], [3, 1, 2]]
+        @test replace("foobarbaz", substmp[perm]...) == "fzzbzzbam"
+        @test replace("foobarbaz", substmp[perm]..., count=2) == "fzzbzzbaz"
+        @test replace("foobarbaz", substmp[perm]..., count=1) == "fzzbarbaz"
+    end
+    @test replace("foobarbaz", "z" => "m", r"a.*a" => uppercase) == "foobARBAm"
+    @test replace("foobarbaz", 'o' => 'z', 'a' => 'q', 'z' => 'm') == "fzzbqrbqm"
+
+
+    # PR #25732 Klaus Crusius <klaus.crusius@web.de>
+    @test replace("\u2202", '*' => '\0', "" => "") == "\u2202"
+
+    @test replace("foobar", 'o' => '0', "" => "") == "f00bar"
+    @test replace("foobar", 'o' => '0', count=1, "" => "") == "foobar"
+    @test replace("foobar", 'o' => '0', count=2, "" => "") == "f0obar"
+    @test replace("foobar", 'o' => "", "" => "") == "fbar"
+    @test replace("foobar", 'o' => "", count=1, "" => "") == "foobar"
+    @test replace("foobar", 'o' => "", count=2, "" => "") == "fobar"
+    @test replace("foobar", 'f' => 'F', "" => "") == "Foobar"
+    @test replace("foobar", 'r' => 'R', "" => "") == "foobaR"
+
+    @test replace("foofoofoo", "foo" => "bar", "" => "") == "barbarbar"
+    @test replace("foobarfoo", "foo" => "baz", "" => "") == "bazbarbaz"
+    @test replace("barfoofoo", "foo" => "baz", "" => "") == "barbazbaz"
+
+    @test replace("", "" => "", "" => "") == ""
+    @test replace("", "" => "x", "" => "") == "x"
+    @test replace("", "x" => "y", "" => "") == ""
+
+    @test replace("abcd", "" => "^", "" => "") == "^a^b^c^d^"
+    @test replace("abcd", "b" => "^", "" => "") == "a^cd"
+    @test replace("abcd", r"b?" => "^", "" => "") == "^a^c^d^"
+    @test replace("abcd", r"b+" => "^", "" => "") == "a^cd"
+    @test replace("abcd", r"b?c?" => "^", "" => "") == "^a^d^"
+    @test replace("abcd", r"[bc]?" => "^", "" => "") == "^a^^d^"
+
+    @test replace("foobarfoo", r"(fo|ba)" => "xx", "" => "") == "xxoxxrxxo"
+    @test replace("foobarfoo", r"(foo|ba)" => "bar", "" => "") == "barbarrbar"
+
+    @test replace("foobar", 'o' => 'ø', "" => "") == "føøbar"
+    @test replace("foobar", 'o' => 'ø', count=2, "" => "") == "føobar"
+    @test replace("føøbar", 'ø' => 'o', "" => "") == "foobar"
+    @test replace("føøbar", 'ø' => 'o', count=2, "" => "") == "foøbar"
+    @test replace("føøbar", 'ø' => 'ö', "" => "") == "fööbar"
+    @test replace("føøbar", 'ø' => 'ö', count=2, "" => "") == "föøbar"
+    @test replace("føøbar", 'ø' => "", "" => "") == "fbar"
+    @test replace("føøbar", 'ø' => "", count=2, "" => "") == "føbar"
+    @test replace("føøbar", 'f' => 'F', "" => "") == "Føøbar"
+    @test replace("ḟøøbar", 'ḟ' => 'F', "" => "") == "Føøbar"
+    @test replace("føøbar", 'f' => 'Ḟ', "" => "") == "Ḟøøbar"
+    @test replace("ḟøøbar", 'ḟ' => 'Ḟ', "" => "") == "Ḟøøbar"
+    @test replace("føøbar", 'r' => 'R', "" => "") == "føøbaR"
+    @test replace("føøbaṙ", 'ṙ' => 'R', "" => "") == "føøbaR"
+    @test replace("føøbar", 'r' => 'Ṙ', "" => "") == "føøbaṘ"
+    @test replace("føøbaṙ", 'ṙ' => 'Ṙ', "" => "") == "føøbaṘ"
+
+    @test replace("ḟøøḟøøḟøø", "ḟøø" => "bar", "" => "") == "barbarbar"
+    @test replace("ḟøøbarḟøø", "ḟøø" => "baz", "" => "") == "bazbarbaz"
+    @test replace("barḟøøḟøø", "ḟøø" => "baz", "" => "") == "barbazbaz"
+
+    @test replace("foofoofoo", "foo" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ"
+    @test replace("fooƀäṙfoo", "foo" => "baz", "" => "") == "bazƀäṙbaz"
+    @test replace("ƀäṙfoofoo", "foo" => "baz", "" => "") == "ƀäṙbazbaz"
+
+    @test replace("foofoofoo", "foo" => "bar", "" => "") == "barbarbar"
+    @test replace("foobarfoo", "foo" => "ƀäż", "" => "") == "ƀäżbarƀäż"
+    @test replace("barfoofoo", "foo" => "ƀäż", "" => "") == "barƀäżƀäż"
+
+    @test replace("ḟøøḟøøḟøø", "ḟøø" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ"
+    @test replace("ḟøøƀäṙḟøø", "ḟøø" => "baz", "" => "") == "bazƀäṙbaz"
+    @test replace("ƀäṙḟøøḟøø", "ḟøø" => "baz", "" => "") == "ƀäṙbazbaz"
+
+    @test replace("ḟøøḟøøḟøø", "ḟøø" => "bar", "" => "") == "barbarbar"
+    @test replace("ḟøøbarḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäżbarƀäż"
+    @test replace("barḟøøḟøø", "ḟøø" => "ƀäż", "" => "") == "barƀäżƀäż"
+
+    @test replace("ḟøøḟøøḟøø", "ḟøø" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ"
+    @test replace("ḟøøƀäṙḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäżƀäṙƀäż"
+    @test replace("ƀäṙḟøøḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäṙƀäżƀäż"
+
+    @test replace("", "" => "ẍ", "" => "") == "ẍ"
+    @test replace("", "ẍ" => "ÿ", "" => "") == ""
+
+    @test replace("äƀçđ", "" => "π", "" => "") == "πäπƀπçπđπ"
+    @test replace("äƀçđ", "ƀ" => "π", "" => "") == "äπçđ"
+    @test replace("äƀçđ", r"ƀ?" => "π", "" => "") == "πäπçπđπ"
+    @test replace("äƀçđ", r"ƀ+" => "π", "" => "") == "äπçđ"
+    @test replace("äƀçđ", r"ƀ?ç?" => "π", "" => "") == "πäπđπ"
+    @test replace("äƀçđ", r"[ƀç]?" => "π", "" => "") == "πäππđπ"
+
+    @test replace("foobarfoo", r"(fo|ba)" => "ẍẍ", "" => "") == "ẍẍoẍẍrẍẍo"
+
+    @test replace("ḟøøbarḟøø", r"(ḟø|ba)" => "xx", "" => "") == "xxøxxrxxø"
+    @test replace("ḟøøbarḟøø", r"(ḟøø|ba)" => "bar", "" => "") == "barbarrbar"
+
+    @test replace("fooƀäṙfoo", r"(fo|ƀä)" => "xx", "" => "") == "xxoxxṙxxo"
+    @test replace("fooƀäṙfoo", r"(foo|ƀä)" => "ƀäṙ", "" => "") == "ƀäṙƀäṙṙƀäṙ"
+
+    @test replace("ḟøøƀäṙḟøø", r"(ḟø|ƀä)" => "xx", "" => "") == "xxøxxṙxxø"
+    @test replace("ḟøøƀäṙḟøø", r"(ḟøø|ƀä)" => "ƀäṙ", "" => "") == "ƀäṙƀäṙṙƀäṙ"
+
+    @test replace("foo", "oo" => uppercase, "" => "") == "fOO"
+
+    # Issue 13332
+    @test replace("abc", 'b' => 2.1, "" => "") == "a2.1c"
+
+    # test replace with a count for String and GenericString
+    # check that replace is a no-op if count==0
+    for s in ["aaa", Test.GenericString("aaa")]
+        @test_throws DomainError replace(s, 'a' => "", count = -1, "" => "")
+        @test replace(s, 'a' => 'z', count=0, "" => "")::String == s
+        @test replace(s, 'a' => 'z', count=1, "" => "") == "zaa"
+        @test replace(s, 'a' => 'z', count=2, "" => "") == "zza"
+        @test replace(s, 'a' => 'z', count=3, "" => "") == "zzz"
+        @test replace(s, 'a' => 'z', count=4, "" => "") == "zzz"
+        @test replace(s, 'a' => 'z', count=typemax(Int), "" => "") == "zzz"
+        @test replace(s, 'a' => 'z', "" => "") == "zzz"
+    end
+
+    let s = "abc"
+        @test replace(s) === s
+        @test replace(s, 'a' => 'z', "" => "") === "zbc"
+        @test replace(s, 'a' => 'z', 'b' => 'y') == "zyc"
+        @test replace(s, 'a' => 'z', 'c' => 'x', "b" => 'y') == "zyx"
+        @test replace(s, '1' => 'z', "" => "") == s
+        @test replace(s, 'b' => "BbB", "" => "", count=2) == "aBbBc"
+    end
+
+    let s = "quick quicker quickest"
+        @test replace(s) === s
+        @test replace(s, "quickest" => 'z', "quicker" => uppercase, "quick" => 'a') == "a QUICKER z"
+        @test replace(s, "quick" => 'a', "quicker" => uppercase, "quickest" => 'z') == "a aer aest"
+        @test replace(s, "quickest" => "lame", "quicker" => "is", "quick" => "Duck", count=2) == "Duck is quickest"
+        @test "1q1u1i1c1k1 1q1u1i1c1k1e1r1 1q1u1i1c1k1e1s1t1" ==
+              replace(s, "" => '1', "" => "") ==
+              replace(s, "" => '1', "" => '2')
+        @test replace(s, "qu" => "QU", "qu" => "never happens", "ick" => "") == "QU QUer QUest"
+        @test replace(s, " " => '_', "r " => "r-") == "quick_quicker-quickest"
+        @test replace(s, r"[aeiou]" => "ä", "ui" => "ki", "i" => "I") == "qääck qääckär qääckäst"
+        @test replace(s, "i" => "I", "ui" => "ki", r"[aeiou]" => "ä") == "qkick qkickär qkickäst"
+        @test replace(s, r"[^ ]+" => "word", "quicker " => "X", count=big"99") == "word word word"
+        @test replace(s, "quicker " => "X", r"[^ ]+" => "word", count=big"99") == "word Xword"
+
+        @test replace(s, r"(quick)(e)" => s"\2-\1", "x" => "X") == "quick e-quickr e-quickst"
+
+        @test replace(s, 'q' => 'Q', 'u' => 'U') == "QUick QUicker QUickest"
+        @test replace(s, 'q' => 'Q', r"u" => 'U') == "QUick QUicker QUickest"
+        @test replace(s, 'q' => 'Q', ==('u') => uppercase) == "QUick QUicker QUickest"
+        @test replace(s, 'q' => 'Q', islowercase => '-') == "Q---- Q------ Q-------"
+        @test replace(s, ['q', 'u'] => 'K') == "KKick KKicker KKickest"
+        @test replace(s, occursin("uq") => 'K') == "KKick KKicker KKickest"
+        @test replace(s, ==('q') => "B") == "Buick Buicker Buickest"
+
+        @test replace(s, "qui" => "A", 'r' => 'R') == "Ack AckeR Ackest"
+        @test replace(s, 'r' => 'x', islowercase => uppercase) == "QUICK QUICKEx QUICKEST"
+        @test replace(s, islowercase => uppercase, 'r' => 'x') == "QUICK QUICKER QUICKEST"
+        @test replace(s, "q" => "z", islowercase => uppercase, 'r' => 'x') == "zUICK zUICKER zUICKEST"
+        @test replace(s, "qui" => "A", 'r' => 'x', islowercase => uppercase) == "ACK ACKEx ACKEST"
+        @test replace(s, "qui" => "A", 'r' => 'x', islowercase => uppercase) == "ACK ACKEx ACKEST"
+        @test replace(s, r"q" => "z", islowercase => uppercase, 'r' => 'x') == "zUICK zUICKER zUICKEST"
+
+        @test replace(s, "q" => s"a\0b") == "aqbuick aqbuicker aqbuickest"
+        @test replace(s, "q" => s"a\0b\n\\\g<0>") == "aqb\n\\quick aqb\n\\quicker aqb\n\\quickest"
+        @test_throws ErrorException("PCRE error: unknown substring") replace(s, r"q" => s"a\1b")
+        @test_throws ErrorException("Bad replacement string: pattern is not a Regex") replace(s, "q" => s"a\1b")
+    end
+end
+
 @testset "chomp/chop" begin
     @test chomp("foo\n") == "foo"
     @test chomp("fo∀\n") == "fo∀"
@@ -376,6 +559,11 @@ end
         #non-hex characters
         @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH")
     end
+
+    @testset "Issue 39284" begin
+        @test "efcdabefcdab8967452301" == bytes2hex(Iterators.reverse(hex2bytes("0123456789abcdefABCDEF")))
+        @test hex2bytes(Iterators.reverse(b"CE1A85EECc")) == UInt8[0xcc, 0xee, 0x58, 0xa1, 0xec]
+    end
 end
 
 # b"" should be immutable
diff --git a/test/subarray.jl b/test/subarray.jl
index 76f00ab7948cb5..cc8aab94e4c424 100644
--- a/test/subarray.jl
+++ b/test/subarray.jl
@@ -134,8 +134,8 @@ end
 function test_bounds(@nospecialize(A))
     @test_throws BoundsError A[0]
     @test_throws BoundsError A[end+1]
-    trailing2 = ntuple(x->1, max(ndims(A)-2, 0))
-    trailing3 = ntuple(x->1, max(ndims(A)-3, 0))
+    trailing2 = ntuple(Returns(1), max(ndims(A)-2, 0))
+    trailing3 = ntuple(Returns(1), max(ndims(A)-3, 0))
     @test_throws BoundsError A[1, 0, trailing2...]
     @test_throws BoundsError A[1, end+1, trailing2...]
     @test_throws BoundsError A[1, 1, 0, trailing3...]
@@ -214,10 +214,10 @@ end
 function runviews(SB::AbstractArray, indexN, indexNN, indexNNN)
     @assert ndims(SB) > 2
     for i3 in indexN, i2 in indexN, i1 in indexN
-        runsubarraytests(SB, i1, i2, i3, ntuple(x->1, max(ndims(SB)-3, 0))...)
+        runsubarraytests(SB, i1, i2, i3, ntuple(Returns(1), max(ndims(SB)-3, 0))...)
     end
     for i2 in indexN, i1 in indexN
-        runsubarraytests(SB, i1, i2, ntuple(x->1, max(ndims(SB)-2, 0))...)
+        runsubarraytests(SB, i1, i2, ntuple(Returns(1), max(ndims(SB)-2, 0))...)
     end
     for i1 in indexNNN
         runsubarraytests(SB, i1)
@@ -718,3 +718,22 @@ end
     s = @view v[1]
     @test copy(s) == fill([1])
 end
+
+@testset "issue 40314: views of CartesianIndices" begin
+    c = CartesianIndices((1:2, 1:4))
+    @test (@view c[c]) === c
+    for inds in Any[(1:1, 1:2), (1:1:1, 1:2)]
+        c2 = @view c[inds...]
+        @test c2 isa CartesianIndices{2}
+        for i2 in inds[2], i1 in inds[1]
+            @test c2[i1, i2] == c[i1, i2]
+        end
+    end
+    for inds in Any[(Colon(), 1:2), (Colon(), 1:1:2)]
+        c2 = @view c[inds...]
+        @test c2 isa CartesianIndices{2}
+        for i2 in inds[2], i1 in axes(c, 1)
+            @test c2[i1, i2] == c[i1, i2]
+        end
+    end
+end
diff --git a/test/subtype.jl b/test/subtype.jl
index 3c720dc4bf0322..d403716646de07 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -140,7 +140,7 @@ function test_diagonal()
     @test !issub(Type{Tuple{T,Any} where T},   Type{Tuple{T,T}} where T)
     @test !issub(Type{Tuple{T,Any,T} where T}, Type{Tuple{T,T,T}} where T)
     @test_broken issub(Type{Tuple{T} where T},       Type{Tuple{T}} where T)
-    @test_broken issub(Ref{Tuple{T} where T},        Ref{Tuple{T}} where T)
+    @test  issub(Ref{Tuple{T} where T},        Ref{Tuple{T}} where T)
     @test !issub(Type{Tuple{T,T} where T},     Type{Tuple{T,T}} where T)
     @test !issub(Type{Tuple{T,T,T} where T},   Type{Tuple{T,T,T}} where T)
     @test  isequal_type(Ref{Tuple{T, T} where Int<:T<:Int},
@@ -587,7 +587,7 @@ function test_old()
     @test !(Type{Tuple{Nothing}} <: Tuple{Type{Nothing}})
 end
 
-const menagerie =
+const easy_menagerie =
     Any[Bottom, Any, Int, Int8, Integer, Real,
         Array{Int,1}, AbstractArray{Int,1},
         Tuple{Int,Vararg{Integer}}, Tuple{Integer,Vararg{Int}}, Tuple{},
@@ -607,12 +607,14 @@ const menagerie =
         Array{(@UnionAll T<:Int T), 1},
         (@UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S}),
         Union{Int,Ref{Union{Int,Int8}}},
-        (@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}}),
         ]
 
-let new = Any[]
-    # add variants of each type
-    for T in menagerie
+const hard_menagerie =
+    Any[(@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}})]
+
+function add_variants!(types)
+    new = Any[]
+    for T in types
         push!(new, Ref{T})
         push!(new, Tuple{T})
         push!(new, Tuple{T,T})
@@ -620,9 +622,14 @@ let new = Any[]
         push!(new, @UnionAll S<:T S)
         push!(new, @UnionAll S<:T Ref{S})
     end
-    append!(menagerie, new)
+    append!(types, new)
 end
 
+add_variants!(easy_menagerie)
+add_variants!(hard_menagerie)
+
+const menagerie = [easy_menagerie; hard_menagerie]
+
 function test_properties()
     x→y = !x || y
     ¬T = @UnionAll X>:T Ref{X}
@@ -1057,14 +1064,15 @@ function test_intersection()
 end
 
 function test_intersection_properties()
-    approx = Tuple{Vector{Vector{T}} where T, Vector{Vector{T}} where T}
-    for T in menagerie
-        for S in menagerie
+    for i in eachindex(menagerie)
+        T = menagerie[i]
+        for j in eachindex(menagerie)
+            S = menagerie[j]
             I = _type_intersect(T,S)
             I2 = _type_intersect(S,T)
             @test isequal_type(I, I2)
-            if I == approx
-                # TODO: some of these cases give a conservative answer
+            if i > length(easy_menagerie) || j > length(easy_menagerie)
+                # TODO: these cases give a conservative answer
                 @test issub(I, T) || issub(I, S)
             else
                 @test issub(I, T) && issub(I, S)
@@ -1569,7 +1577,7 @@ f31082(::Pair{B, C}, ::C, ::C) where {B, C} = 1
                Tuple{Type{Val{T}},Int,T} where T)
 @testintersect(Tuple{Type{Val{T}},Integer,T} where T,
                Tuple{Type,Int,Integer},
-               Tuple{Type{Val{T}},Int,T} where T<:Integer)
+               Tuple{Type{Val{T}},Int,Integer} where T)
 @testintersect(Tuple{Type{Val{T}},Integer,T} where T>:Integer,
                Tuple{Type,Int,Integer},
                Tuple{Type{Val{T}},Int,Integer} where T>:Integer)
@@ -1796,7 +1804,7 @@ let X1 = Tuple{AlmostLU, Vector{T}} where T,
     # TODO: the quality of this intersection is not great; for now just test that it
     # doesn't stack overflow
     @test I<:X1 || I<:X2
-    actual = Tuple{AlmostLU{S, X} where X<:Matrix{S}, Vector{S}} where S<:Union{Float32, Float64}
+    actual = Tuple{Union{AlmostLU{S, X} where X<:Matrix{S}, AlmostLU{S, <:Matrix}}, Vector{S}} where S<:Union{Float32, Float64}
     @test I == actual
 end
 
@@ -1858,7 +1866,7 @@ let A = Tuple{Type{T} where T<:Ref, Ref, Union{T, Union{Ref{T}, T}} where T<:Ref
     I = typeintersect(A,B)
     # this was a case where <: disagreed with === (due to a badly-normalized type)
     @test I == typeintersect(A,B)
-    @test I == Tuple{Type{T}, Ref{T}, Union{Ref{T}, T}} where T<:Ref
+    @test I == Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
 end
 
 # issue #39218
@@ -1898,6 +1906,68 @@ end
 # issue #39948
 let A = Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
     I = typeintersect(A, Tuple{Vararg{Vector{T}}} where T)
-    @test_broken I <: A
-    @test_broken !Base.has_free_typevars(I)
+    @test I <: A
+    @test !Base.has_free_typevars(I)
+end
+
+# issue #8915
+struct D8915{T<:Union{Float32,Float64}}
+    D8915{T}(a) where {T} = 1
+    D8915{T}(a::Int) where {T} = 2
+end
+@test D8915{Float64}(1) == 2
+@test D8915{Float64}(1.0) == 1
+
+# issue #18985
+f18985(x::T, y...) where {T<:Union{Int32,Int64}} = (length(y), f18985(y[1], y[2:end]...)...)
+f18985(x::T) where {T<:Union{Int32,Int64}} = 100
+@test f18985(1, 2, 3) == (2, 1, 100)
+
+# issue #40048
+let A = Tuple{Ref{T}, Vararg{T}} where T,
+    B = Tuple{Ref{U}, Union{Ref{S}, Ref{U}, Int}, Union{Ref{S}, S}} where S where U,
+    C = Tuple{Ref{U}, Union{Ref{S}, Ref{U}, Ref{W}}, Union{Ref{S}, W, V}} where V<:AbstractArray where W where S where U
+    I = typeintersect(A, B)
+    @test I != Union{}
+    @test I <: A
+    @test I <: B
+    # avoid stack overflow
+    J = typeintersect(A, C)
+    @test_broken J != Union{}
 end
+
+let A = Tuple{Dict{I,T}, I, T} where T where I,
+    B = Tuple{AbstractDict{I,T}, T, I} where T where I
+    # TODO: we should probably have I == T here
+    @test typeintersect(A, B) == Tuple{Dict{I,T}, I, T} where {I, T}
+end
+
+let A = Tuple{UnionAll, Vector{Any}},
+    B = Tuple{Type{T}, T} where T<:AbstractArray,
+    I = typeintersect(A, B)
+    @test !isconcretetype(I)
+    @test I == Tuple{Type{T}, Vector{Any}} where T<:AbstractArray
+end
+
+@testintersect(Tuple{Type{Vector{<:T}}, T} where {T<:Integer},
+               Tuple{Type{T}, AbstractArray} where T<:Array,
+               Bottom)
+
+struct S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}
+end
+
+@testintersect(Tuple{Type{S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}} where _Z14 where _Z13 where _Z12 where _Z11 where _Z10 where _Z9 where _Z8 where _Z7 where _Z6 where _Z5 where _Z4 where _Z3 where _Z2 where _Z1 where _Z where _Y where _X where _W where _V where _U where _T where _S where _R where _Q where _P where _O where _N where _M where _L where _K where _J where _I where _H where _G where _F where _E where _D where _C where _B where _A, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any},
+               Tuple{Type{S40{A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1}, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1,
+               Bottom)
+
+let A = Tuple{Any, Type{Ref{_A}} where _A},
+    B = Tuple{Type{T}, Type{<:Union{Ref{T}, T}}} where T,
+    I = typeintersect(A, B)
+    @test I != Union{}
+    # TODO: this intersection result is still too narrow
+    @test_broken Tuple{Type{Ref{Integer}}, Type{Ref{Integer}}} <: I
+end
+
+@testintersect(Tuple{Type{T}, T} where T<:(Tuple{Vararg{_A, _B}} where _B where _A),
+               Tuple{Type{Tuple{Vararg{_A, N}} where _A<:F}, Pair{N, F}} where F where N,
+               Bottom)
diff --git a/test/syntax.jl b/test/syntax.jl
index d97f5aeb251c8c..09a91a31305f31 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -618,15 +618,12 @@ end
 @test A15838.@f() === nothing
 @test A15838.@f(1) === :b
 let ex = :(A15838.@f(1, 2)), __source__ = LineNumberNode(@__LINE__, Symbol(@__FILE__))
-    nometh = try
+    e = try
         macroexpand(@__MODULE__, ex)
         false
     catch ex
         ex
-    end::LoadError
-    @test nometh.file === string(__source__.file)
-    @test nometh.line === __source__.line
-    e = nometh.error::MethodError
+    end::MethodError
     @test e.f === getfield(A15838, Symbol("@f"))
     @test e.args === (__source__, @__MODULE__, 1, 2)
 end
@@ -822,7 +819,7 @@ let f = function (x; kw...)
 end
 
 # normalization of Unicode symbols (#19464)
-let ε=1, μ=2, x=3, î=4
+let ε=1, μ=2, x=3, î=4, ⋅=5, (-)=6
     # issue #5434 (mu vs micro):
     @test Meta.parse("\u00b5") === Meta.parse("\u03bc")
     @test µ == μ == 2
@@ -832,6 +829,20 @@ let ε=1, μ=2, x=3, î=4
     # latin vs greek ε (#14751)
     @test Meta.parse("\u025B") === Meta.parse("\u03B5")
     @test ɛ == ε == 1
+    # middot char · or · vs math dot operator ⋅ (#25098)
+    @test Meta.parse("\u00b7") === Meta.parse("\u0387") === Meta.parse("\u22c5")
+    @test (·) == (·) == (⋅) == 5
+    # minus − vs hyphen-minus - (#26193)
+    @test Meta.parse("\u2212") === Meta.parse("-")
+    @test Meta.parse("\u221242") === Meta.parse("-42")
+    @test Meta.parse("\u2212 42") == Meta.parse("- 42")
+    @test Meta.parse("\u2212x") == Meta.parse("-x")
+    @test Meta.parse("x \u2212 42") == Meta.parse("x - 42")
+    @test Meta.parse("x \u2212= 42") == Meta.parse("x -= 42")
+    @test Meta.parse("100.0e\u22122") === Meta.parse("100.0E\u22122") === Meta.parse("100.0e-2")
+    @test Meta.parse("100.0f\u22122") === Meta.parse("100.0f-2")
+    @test Meta.parse("0x100p\u22128") === Meta.parse("0x100P\u22128") === Meta.parse("0x100p-8")
+    @test (−) == (-) == 6
 end
 
 # issue #8925
@@ -1357,7 +1368,6 @@ end
 @test Meta.parse("√3x^2") == Expr(:call, :*, Expr(:call, :√, 3), Expr(:call, :^, :x, 2))
 @test Meta.parse("-3x^2") == Expr(:call, :*, -3, Expr(:call, :^, :x, 2))
 @test_throws ParseError Meta.parse("2!3")
-@test_throws ParseError Meta.parse("2√3")
 
 # issue #27914
 @test Meta.parse("2f(x)")        == Expr(:call, :*, 2, Expr(:call, :f, :x))
@@ -1500,7 +1510,7 @@ let ex = Meta.parse("@test27521(2) do y; y; end")
 end
 
 # issue #27129
-f27129(x = 1) = (@Base._inline_meta; x)
+f27129(x = 1) = (@inline; x)
 for meth in methods(f27129)
     @test ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), meth, C_NULL, meth.source).inlineable
 end
@@ -1864,7 +1874,7 @@ end
 @test_throws UndefVarError eval(:(1+$(Symbol(""))))
 
 # issue #31404
-f31404(a, b; kws...) = (a, b, kws.data)
+f31404(a, b; kws...) = (a, b, values(kws))
 @test f31404(+, (Type{T} where T,); optimize=false) === (+, (Type,), (optimize=false,))
 
 # issue #28992
@@ -2158,6 +2168,12 @@ end
 @test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c)
 @test Meta.parse("a ⟂ b ∥ c") == Expr(:comparison, :a, :⟂, :b, :∥, :c)
 
+# issue 39350
+@testset "binary ⫪ and ⫫" begin
+    @test Meta.parse("a ⫪ b") == Expr(:call, :⫪, :a, :b)
+    @test Meta.parse("a ⫫ b") == Expr(:call, :⫫, :a, :b)
+end
+
 # only allow certain characters after interpolated vars (#25231)
 @test Meta.parse("\"\$x෴  \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.")
 @test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) == :string
@@ -2694,6 +2710,27 @@ end
     @test Meta.isexpr(Meta.@lower(f((; a, b::Int)) = a + b), :error)
 end
 
+# #33697
+@testset "N-dimensional concatenation" begin
+    @test :([1 2 5; 3 4 6;;; 0 9 3; 4 5 4]) ==
+        Expr(:ncat, 3, Expr(:nrow, 1, Expr(:row, 1, 2, 5), Expr(:row, 3, 4, 6)),
+                        Expr(:nrow, 1, Expr(:row, 0, 9, 3), Expr(:row, 4, 5, 4)))
+    @test :([1 ; 2 ;; 3 ; 4]) == Expr(:ncat, 2, Expr(:nrow, 1, 1, 2), Expr(:nrow, 1, 3, 4))
+
+    @test_throws ParseError Meta.parse("[1 2 ;; 3 4]") # cannot mix spaces and ;; except as line break
+    @test :([1 2 ;;
+            3 4]) == :([1 2 3 4])
+    @test :([1 2 ;;
+            3 4 ; 2 3 4 5]) == :([1 2 3 4 ; 2 3 4 5])
+
+    @test Meta.parse("[1;\n]") == :([1;]) # ensure line breaks following semicolons are treated correctly
+    @test Meta.parse("[1;\n\n]") == :([1;])
+    @test Meta.parse("[1\n;]") == :([1;]) # semicolons following a linebreak are fine
+    @test Meta.parse("[1\n;;; 2]") == :([1;;; 2])
+    @test_throws ParseError Meta.parse("[1;\n;2]") # semicolons cannot straddle a line break
+    @test_throws ParseError Meta.parse("[1; ;2]") # semicolons cannot be separated by a space
+end
+
 # issue #25652
 x25652 = 1
 x25652_2 = let (x25652, _) = (x25652, nothing)
@@ -2751,3 +2788,181 @@ end
 @test eval(:(x = $(QuoteNode(Core.SlotNumber(1))))) == Core.SlotNumber(1)
 @test_throws ErrorException("syntax: SSAValue objects should not occur in an AST") eval(:(x = $(Core.SSAValue(1))))
 @test_throws ErrorException("syntax: Slot objects should not occur in an AST") eval(:(x = $(Core.SlotNumber(1))))
+
+# juxtaposition of radical symbols (#40094)
+@test Meta.parse("2√3") == Expr(:call, :*, 2, Expr(:call, :√, 3))
+@test Meta.parse("2∛3") == Expr(:call, :*, 2, Expr(:call, :∛, 3))
+@test Meta.parse("2∜3") == Expr(:call, :*, 2, Expr(:call, :∜, 3))
+
+macro m_underscore_hygiene()
+    return :(_ = 1)
+end
+
+@test @macroexpand(@m_underscore_hygiene()) == :(_ = 1)
+
+macro m_begin_hygiene(a)
+    return :($(esc(a))[begin])
+end
+
+@test @m_begin_hygiene([1, 2, 3]) == 1
+
+# issue 40258
+@test "a $("b $("c")")" == "a b c"
+
+@test "$(([[:a, :b], [:c, :d]]...)...)" == "abcd"
+
+@test eval(Expr(:string, "a", Expr(:string, "b", "c"))) == "abc"
+@test eval(Expr(:string, "a", Expr(:string, "b", Expr(:string, "c")))) == "abc"
+
+macro m_nospecialize_unnamed_hygiene()
+    return :(f(@nospecialize(::Any)) = Any)
+end
+
+@test @m_nospecialize_unnamed_hygiene()(1) === Any
+
+# https://github.com/JuliaLang/julia/issues/40574
+@testset "no mutation while destructuring" begin
+    x = [1, 2]
+    x[2], x[1] = x
+    @test x == [2, 1]
+
+    x = [1, 2, 3]
+    x[3], x[1:2]... = x
+    @test x == [2, 3, 1]
+end
+
+@testset "escaping newlines inside strings" begin
+    c = "c"
+
+    @test "a\
+b" == "ab"
+    @test "a\
+    b" == "ab"
+    @test raw"a\
+b" == "a\\\nb"
+    @test "a$c\
+b" == "acb"
+    @test "\\
+" == "\\\n"
+
+
+    @test """
+          a\
+          b""" == "ab"
+    @test """
+          a\
+            b""" == "ab"
+    @test """
+            a\
+          b""" == "ab"
+    @test raw"""
+          a\
+          b""" == "a\\\nb"
+    @test """
+          a$c\
+          b""" == "acb"
+
+    @test """
+          \
+          """ == ""
+    @test """
+          \\
+          """ == "\\\n"
+    @test """
+          \\\
+          """ == "\\"
+    @test """
+          \\\\
+          """ == "\\\\\n"
+    @test """
+          \\\\\
+          """ == "\\\\"
+    @test """
+          \
+          \
+          """ == ""
+    @test """
+          \\
+          \
+          """ == "\\\n"
+    @test """
+          \\\
+          \
+          """ == "\\"
+
+
+    @test `a\
+b` == `ab`
+    @test `a\
+    b` == `ab`
+    @test `a$c\
+b` == `acb`
+    @test `"a\
+b"` == `ab`
+    @test `'a\
+b'` == `$("a\\\nb")`
+    @test `\\
+` == `'\'`
+
+
+    @test ```
+          a\
+          b``` == `ab`
+    @test ```
+          a\
+            b``` == `ab`
+    @test ```
+            a\
+          b``` == `  ab`
+    @test ```
+          a$c\
+          b``` == `acb`
+    @test ```
+          "a\
+          b"``` == `ab`
+    @test ```
+          'a\
+          b'``` == `$("a\\\nb")`
+    @test ```
+          \\
+          ``` == `'\'`
+end
+
+# issue #41253
+@test (function (::Dict{}); end)(Dict()) === nothing
+
+@testset "issue #41330" begin
+    @test Meta.parse("\"a\\\r\nb\"") == "ab"
+    @test Meta.parse("\"a\\\rb\"") == "ab"
+    @test eval(Meta.parse("`a\\\r\nb`")) == `ab`
+    @test eval(Meta.parse("`a\\\rb`")) == `ab`
+end
+
+@testset "slurping into function def" begin
+    x, f()... = [1, 2, 3]
+    @test x == 1
+    @test f() == [2, 3]
+    # test that call to `Base.rest` is outside the definition of `f`
+    @test f() === f()
+
+    x, f()... = 1, 2, 3
+    @test x == 1
+    @test f() == (2, 3)
+end
+
+@testset "long function bodies" begin
+    ex = Expr(:block)
+    ex.args = fill!(Vector{Any}(undef, 700000), 1)
+    f = eval(Expr(:function, :(), ex))
+    @test f() == 1
+    ex = Expr(:vcat)
+    ex.args = fill!(Vector{Any}(undef, 600000), 1)
+    @test_throws ErrorException("syntax: expression too large") eval(ex)
+end
+
+# issue 25678
+@generated f25678(x::T) where {T} = code_lowered(sin, Tuple{x})[]
+@test f25678(pi/6) === sin(pi/6)
+
+@generated g25678(x) = return :x
+@test g25678(7) === 7
diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl
index 8abe583925d933..67c2023a0bc843 100644
--- a/test/testhelpers/Furlongs.jl
+++ b/test/testhelpers/Furlongs.jl
@@ -14,13 +14,13 @@ struct Furlong{p,T<:Number} <: Number
 end
 Furlong(x::T) where {T<:Number} = Furlong{1,T}(x)
 Furlong(x::Furlong) = x
-(::Type{T})(x::Furlong) where {T<:Number} = T(x.val)::T
+(::Type{T})(x::Furlong{0}) where {T<:Number} = T(x.val)::T
 Furlong{p}(v::Number) where {p} = Furlong{p,typeof(v)}(v)
 Furlong{p}(x::Furlong{q}) where {p,q} = (@assert(p==q); Furlong{p,typeof(x.val)}(x.val))
 Furlong{p,T}(x::Furlong{q}) where {T,p,q} = (@assert(p==q); Furlong{p,T}(T(x.val)))
 
 Base.promote_type(::Type{Furlong{p,T}}, ::Type{Furlong{p,S}}) where {p,T,S} =
-    (Base.@_pure_meta; Furlong{p,promote_type(T,S)})
+    Furlong{p,promote_type(T,S)}
 
 Base.one(x::Furlong{p,T}) where {p,T} = one(T)
 Base.one(::Type{Furlong{p,T}}) where {p,T} = one(T)
@@ -36,15 +36,14 @@ Base.floatmin(::Type{Furlong{p,T}}) where {p,T<:AbstractFloat} = Furlong{p}(floa
 Base.floatmin(::Furlong{p,T}) where {p,T<:AbstractFloat} = floatmin(Furlong{p,T})
 Base.floatmax(::Type{Furlong{p,T}}) where {p,T<:AbstractFloat} = Furlong{p}(floatmax(T))
 Base.floatmax(::Furlong{p,T}) where {p,T<:AbstractFloat} = floatmax(Furlong{p,T})
+Base.conj(x::Furlong{p,T}) where {p,T} = Furlong{p,T}(conj(x.val))
 
-# convert Furlong exponent p to a canonical form.  This
-# is not type stable, but it doesn't matter since it is used
-# at compile time (in generated functions), not runtime
+# convert Furlong exponent p to a canonical form
 canonical_p(p) = isinteger(p) ? Int(p) : Rational{Int}(p)
 
 Base.abs(x::Furlong{p}) where {p} = Furlong{p}(abs(x.val))
-@generated Base.abs2(x::Furlong{p}) where {p} = :(Furlong{$(canonical_p(2p))}(abs2(x.val)))
-@generated Base.inv(x::Furlong{p}) where {p} = :(Furlong{$(canonical_p(-p))}(inv(x.val)))
+Base.abs2(x::Furlong{p}) where {p} = Furlong{canonical_p(2p)}(abs2(x.val))
+Base.inv(x::Furlong{p}) where {p} = Furlong{canonical_p(-p)}(inv(x.val))
 
 for f in (:isfinite, :isnan, :isreal, :isinf)
     @eval Base.$f(x::Furlong) = $f(x.val)
@@ -63,11 +62,10 @@ end
 for op in (:(==), :(!=), :<, :<=, :isless, :isequal)
     @eval $op(x::Furlong{p}, y::Furlong{p}) where {p} = $op(x.val, y.val)
 end
-# generated functions to allow type inference of the value of the exponent:
 for (f,op) in ((:_plus,:+),(:_minus,:-),(:_times,:*),(:_div,://))
-    @eval @generated function $f(v::T, ::Furlong{p}, ::Union{Furlong{q},Val{q}}) where {T,p,q}
+    @eval function $f(v::T, ::Furlong{p}, ::Union{Furlong{q},Val{q}}) where {T,p,q}
         s = $op(p, q)
-        :(Furlong{$(canonical_p(s)),$T}(v))
+        Furlong{canonical_p(s),T}(v)
     end
 end
 for (op,eop) in ((:*, :_plus), (:/, :_minus), (://, :_minus), (:div, :_minus))
diff --git a/test/testhelpers/ImmutableArrays.jl b/test/testhelpers/ImmutableArrays.jl
new file mode 100644
index 00000000000000..df2a78387e07bf
--- /dev/null
+++ b/test/testhelpers/ImmutableArrays.jl
@@ -0,0 +1,28 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# ImmutableArrays (arrays that implement getindex but not setindex!)
+
+# This test file defines an array wrapper that is immutable. It can be used to
+# test the action of methods on immutable arrays.
+
+module ImmutableArrays
+
+export ImmutableArray
+
+"An immutable wrapper type for arrays."
+struct ImmutableArray{T,N,A<:AbstractArray} <: AbstractArray{T,N}
+    data::A
+end
+
+ImmutableArray(data::AbstractArray{T,N}) where {T,N} = ImmutableArray{T,N,typeof(data)}(data)
+
+# Minimal AbstractArray interface
+Base.size(A::ImmutableArray) = size(A.data)
+Base.size(A::ImmutableArray, d) = size(A.data, d)
+Base.getindex(A::ImmutableArray, i...) = getindex(A.data, i...)
+
+# The immutable array remains immutable after conversion to AbstractArray
+AbstractArray{T}(A::ImmutableArray) where {T} = ImmutableArray(AbstractArray{T}(A.data))
+AbstractArray{T,N}(A::ImmutableArray{S,N}) where {S,T,N} = ImmutableArray(AbstractArray{T,N}(A.data))
+
+end
diff --git a/test/testhelpers/InfiniteArrays.jl b/test/testhelpers/InfiniteArrays.jl
index bc6de1afc5503d..d69130f4d726aa 100644
--- a/test/testhelpers/InfiniteArrays.jl
+++ b/test/testhelpers/InfiniteArrays.jl
@@ -39,13 +39,11 @@ struct OneToInf{T<:Integer} <: AbstractUnitRange{T} end
 OneToInf() = OneToInf{Int}()
 
 Base.axes(r::OneToInf) = (r,)
-Base.unsafe_indices(r::OneToInf) = (r,)
-Base.unsafe_length(r::OneToInf) = Infinity()
 Base.size(r::OneToInf) = (Infinity(),)
 Base.first(r::OneToInf{T}) where {T} = oneunit(T)
-Base.length(r::OneToInf{T}) where {T} = Infinity()
-Base.last(r::OneToInf{T}) where {T} = Infinity()
+Base.length(r::OneToInf) = Infinity()
+Base.last(r::OneToInf) = Infinity()
 Base.unitrange(r::OneToInf) = r
 Base.oneto(::Infinity) = OneToInf()
 
-end
\ No newline at end of file
+end
diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl
index 67de3ef4766522..27c666c9dacbd8 100644
--- a/test/testhelpers/OffsetArrays.jl
+++ b/test/testhelpers/OffsetArrays.jl
@@ -68,7 +68,6 @@ offset_coerce(::Type{I}, r::AbstractUnitRange) where I<:AbstractUnitRange{T} whe
 @inline Base.parent(r::IdOffsetRange) = r.parent
 @inline Base.axes(r::IdOffsetRange) = (Base.axes1(r),)
 @inline Base.axes1(r::IdOffsetRange) = IdOffsetRange(Base.axes1(r.parent), r.offset)
-@inline Base.unsafe_indices(r::IdOffsetRange) = (r,)
 @inline Base.length(r::IdOffsetRange) = length(r.parent)
 Base.reduced_index(i::IdOffsetRange) = typeof(i)(first(i):first(i))
 # Workaround for #92 on Julia < 1.4
@@ -298,6 +297,7 @@ Base.reshape(A::OffsetArray, inds::Tuple{Union{Integer,Base.OneTo},Vararg{Union{
 Base.reshape(A::OffsetArray, inds::Dims) = reshape(parent(A), inds)
 Base.reshape(A::OffsetArray, ::Colon) = reshape(parent(A), Colon())
 Base.reshape(A::OffsetVector, ::Colon) = A
+Base.reshape(A::OffsetVector, ::Tuple{Colon}) = A
 Base.reshape(A::OffsetArray, inds::Union{Int,Colon}...) = reshape(parent(A), inds)
 Base.reshape(A::OffsetArray, inds::Tuple{Vararg{Union{Int,Colon}}}) = reshape(parent(A), inds)
 
diff --git a/test/testhelpers/coverage_file.info b/test/testhelpers/coverage_file.info
index 9b4b1c1f2f96e7..1dea941fb2441d 100644
--- a/test/testhelpers/coverage_file.info
+++ b/test/testhelpers/coverage_file.info
@@ -4,7 +4,8 @@ DA:4,1
 DA:5,0
 DA:7,1
 DA:8,1
-DA:9,5
+DA:9,3
+DA:10,5
 DA:11,1
 DA:12,1
 DA:14,0
diff --git a/test/testhelpers/coverage_file.info.bad b/test/testhelpers/coverage_file.info.bad
index 44e33a9df68c79..d4d81eda267873 100644
--- a/test/testhelpers/coverage_file.info.bad
+++ b/test/testhelpers/coverage_file.info.bad
@@ -4,7 +4,8 @@ DA:4,1
 DA:5,0
 DA:7,1
 DA:8,1
-DA:9,5
+DA:9,3
+DA:10,5
 DA:11,1
 DA:12,1
 DA:14,0
@@ -14,6 +15,6 @@ DA:19,1
 DA:20,1
 DA:22,1
 DA:1234,0
-LH:11
-LF:15
+LH:12
+LF:16
 end_of_record
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index 87b3edc7c80257..f3d2dc9577c64d 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -738,8 +738,7 @@ end
 try
     @macroexpand @threads(for i = 1:10, j = 1:10; end)
 catch ex
-    @test ex isa LoadError
-    @test ex.error isa ArgumentError
+    @test ex isa ArgumentError
 end
 
 @testset "@spawn interpolation" begin
@@ -841,6 +840,19 @@ fib34666(x) =
     end
 @test fib34666(25) == 75025
 
+# issue #41324
+@testset "Co-schedule" begin
+    parent = Threads.@spawn begin
+        @test current_task().sticky == false
+        child = @async begin end
+        @test current_task().sticky == true
+        @test Threads.threadid() == Threads.threadid(child)
+        wait(child)
+    end
+    wait(parent)
+    @test parent.sticky == true
+end
+
 function jitter_channel(f, k, delay, ntasks, schedule)
     x = Channel(ch -> foreach(i -> put!(ch, i), 1:k), 1)
     y = Channel(k) do ch
@@ -875,3 +887,28 @@ end
     end
     @test sort!(collect(ys)) == 1:3
 end
+
+# reproducible multi-threaded rand()
+
+using Random
+
+function reproducible_rand(r, i)
+    if i == 0
+        return UInt64(0)
+    end
+    r1 = rand(r, UInt64)*hash(i)
+    t1 = Threads.@spawn reproducible_rand(r, i-1)
+    t2 = Threads.@spawn reproducible_rand(r, i-1)
+    r2 = rand(r, UInt64)
+    return r1 + r2 + fetch(t1) + fetch(t2)
+end
+
+@testset "Task-local random" begin
+    r = Random.TaskLocalRNG()
+    Random.seed!(r, 23)
+    val = reproducible_rand(r, 10)
+    for i = 1:4
+        Random.seed!(r, 23)
+        @test reproducible_rand(r, 10) == val
+    end
+end
diff --git a/test/tuple.jl b/test/tuple.jl
index 80a4323f6d2f27..913f024240e7ae 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -99,11 +99,13 @@ end
         @test BitPerm_19352(0,2,4,6,1,3,5,7).p[2] == 0x02
     end
 
-    @testset "ninitialized" begin
-        @test Tuple{Int,Any}.ninitialized == 2
-        @test Tuple.ninitialized == 0
-        @test Tuple{Int,Vararg{Any}}.ninitialized == 1
-        @test Tuple{Any,Any,Vararg{Any}}.ninitialized == 2
+    @testset "n_uninitialized" begin
+        @test Tuple.name.n_uninitialized == 0
+        @test Core.Compiler.datatype_min_ninitialized(Tuple{Int,Any}) == 2
+        @test Core.Compiler.datatype_min_ninitialized(Tuple) == 0
+        @test Core.Compiler.datatype_min_ninitialized(Tuple{Int,Vararg{Any}}) == 1
+        @test Core.Compiler.datatype_min_ninitialized(Tuple{Any,Any,Vararg{Any}}) == 2
+        @test Core.Compiler.datatype_min_ninitialized(Tuple{Any,Any,Vararg{Any,3}}) == 5
     end
 
     @test empty((1, 2.0, "c")) === ()
@@ -179,6 +181,15 @@ end
         @test_throws MethodError (1,)[]
         @test_throws MethodError (1,1,1)[1,1]
     end
+
+    @testset "get() method for Tuple (Issue #40809)" begin
+        @test get((5, 6, 7), 1, 0) == 5
+        @test get((), 5, 0) == 0
+        @test get((1,), 3, 0) == 0
+        @test get(()->0, (5, 6, 7), 1) == 5
+        @test get(()->0, (), 4) == 0
+        @test get(()->0, (1,), 3) == 0
+    end
 end
 
 @testset "fill to length" begin
@@ -272,13 +283,13 @@ end
     @test mapfoldl(abs, =>, (-1,-2,-3,-4), init=-10) == ((((-10=>1)=>2)=>3)=>4)
     @test mapfoldl(abs, =>, (), init=-10) == -10
     @test mapfoldl(abs, Pair{Any,Any}, (-30:-1...,)) == mapfoldl(abs, Pair{Any,Any}, [-30:-1...,])
-    @test_throws ArgumentError mapfoldl(abs, =>, ())
+    @test_throws "reducing over an empty collection" mapfoldl(abs, =>, ())
 end
 
 @testset "filter" begin
     @test filter(isodd, (1,2,3)) == (1, 3)
     @test filter(isequal(2), (true, 2.0, 3)) === (2.0,)
-    @test filter(i -> true, ()) == ()
+    @test filter(Returns(true), ()) == ()
     @test filter(identity, (true,)) === (true,)
     longtuple = ntuple(identity, 20)
     @test filter(iseven, longtuple) == ntuple(i->2i, 10)
@@ -350,6 +361,24 @@ end
     @test prod(()) === 1
     @test prod((1,2,3)) === 6
 
+    # issue 39182
+    @test sum((0xe1, 0x1f)) === sum([0xe1, 0x1f])
+    @test sum((Int8(3),)) === Int(3)
+    @test sum((UInt8(3),)) === UInt(3)
+    @test sum((3,)) === Int(3)
+    @test sum((3.0,)) === 3.0
+    @test sum(("a",)) == sum(["a"])
+    @test sum((0xe1, 0x1f), init=0x0) == sum([0xe1, 0x1f], init=0x0)
+
+    # issue 39183
+    @test prod((Int8(100), Int8(100))) === 10000
+    @test prod((Int8(3),)) === Int(3)
+    @test prod((UInt8(3),)) === UInt(3)
+    @test prod((3,)) === Int(3)
+    @test prod((3.0,)) === 3.0
+    @test prod(("a",)) == prod(["a"])
+    @test prod((0xe1, 0x1f), init=0x1) == prod([0xe1, 0x1f], init=0x1)
+
     @testset "all" begin
         @test all(()) === true
         @test all((false,)) === false
@@ -522,6 +551,9 @@ end
 
     @test Base.setindex((1, 2, 4), 4, true) === (4, 2, 4)
     @test_throws BoundsError Base.setindex((1, 2), 2, false)
+
+    f() = Base.setindex((1:1, 2:2, 3:3), 9, 1)
+    @test @inferred(f()) == (9, 2:2, 3:3)
 end
 
 @testset "inferrable range indexing with constant values" begin
@@ -622,3 +654,6 @@ f38837(xs) = map((F,x)->F(x), (Float32, Float64), xs)
     @test_throws BoundsError (1, 2)[0:2]
     @test_throws ArgumentError (1, 2)[OffsetArrays.IdOffsetRange(1:2, -1)]
 end
+
+# https://github.com/JuliaLang/julia/issues/40814
+@test Base.return_types(NTuple{3,Int}, (Vector{Int},)) == Any[NTuple{3,Int}]
diff --git a/test/vecelement.jl b/test/vecelement.jl
index 5652ea10d3aa6a..6638f06f4f3582 100644
--- a/test/vecelement.jl
+++ b/test/vecelement.jl
@@ -96,7 +96,7 @@ const _llvmtypes = Dict{DataType, String}(
     ret <$(N) x $(llvmT)> %3
     """
     return quote
-        Base.@_inline_meta
+        Base.@inline
         Core.getfield(Base, :llvmcall)($exp, Vec{$N, $T}, Tuple{Vec{$N, $T}, Vec{$N, $T}}, x, y)
     end
 end
diff --git a/test/worlds.jl b/test/worlds.jl
index 692f4febd91cdc..2b4f575e1905ae 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -219,8 +219,8 @@ function instance(f, types)
     if isa(specs, Nothing)
     elseif isa(specs, Core.SimpleVector)
         for i = 1:length(specs)
-            if isassigned(specs, i)
-                mi = specs[i]::Core.MethodInstance
+            mi = specs[i]
+            if mi isa Core.MethodInstance
                 if mi.specTypes <: tt && tt <: mi.specTypes
                     inst = mi
                     break