From e904a85d98773f24bb572f6b187b53fe0e85d474 Mon Sep 17 00:00:00 2001
From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com>
Date: Tue, 8 Aug 2023 09:34:41 -0500
Subject: [PATCH 001/212] Set FORCE_RPATH for ROCm (#1468)

---
 manywheel/build_rocm.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh
index 0c1650f9b..fbbf7d3a6 100755
--- a/manywheel/build_rocm.sh
+++ b/manywheel/build_rocm.sh
@@ -14,6 +14,8 @@ export USE_STATIC_NCCL=1
 export ATEN_STATIC_CUDA=1
 export USE_CUDA_STATIC_LINK=1
 export INSTALL_TEST=0 # dont install test binaries into site-packages
+# Set RPATH instead of RUNPATH when using patchelf to avoid LD_LIBRARY_PATH override
+export FORCE_RPATH="--force-rpath"
 
 # Keep an array of cmake variables to add to
 if [[ -z "$CMAKE_ARGS" ]]; then

From 3c467094834426cfcd8320a5264f19a539e190c7 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 8 Aug 2023 13:46:29 -0400
Subject: [PATCH 002/212] Decouple aarch64 ci setup and build (#1470)

---
 aarch64_linux/aarch64_ci_build.sh | 43 ++----------------------------
 aarch64_linux/aarch64_ci_setup.sh | 44 +++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 41 deletions(-)
 mode change 100755 => 100644 aarch64_linux/aarch64_ci_build.sh
 create mode 100644 aarch64_linux/aarch64_ci_setup.sh

diff --git a/aarch64_linux/aarch64_ci_build.sh b/aarch64_linux/aarch64_ci_build.sh
old mode 100755
new mode 100644
index 41843cead..c374359c2
--- a/aarch64_linux/aarch64_ci_build.sh
+++ b/aarch64_linux/aarch64_ci_build.sh
@@ -1,47 +1,8 @@
 #!/bin/bash
 set -eux -o pipefail
 
-# This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script
-# as we need to install conda and setup the python version for the build.
-
-CONDA_PYTHON_EXE=/opt/conda/bin/python
-CONDA_EXE=/opt/conda/bin/conda
-PATH=/opt/conda/bin:$PATH
-LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
-
-###############################################################################
-# Install OS dependent packages
-###############################################################################
-yum -y install epel-release
-yum -y install less zstd libgomp
-
-###############################################################################
-# Install conda
-# disable SSL_verify due to getting "Could not find a suitable TLS CA certificate bundle, invalid path"
-# when using Python version, less than the conda latest
-###############################################################################
-echo 'Installing conda-forge'
-curl -L -o /mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh
-chmod +x /mambaforge.sh
-/mambaforge.sh -b -p /opt/conda
-rm /mambaforge.sh
-/opt/conda/bin/conda config --set ssl_verify False
-/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas
-python --version
-conda --version
-
-###############################################################################
-# Exec libglfortran.a hack
-#
-# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC.
-# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get
-# ubuntu's libgfortran.a which is compiled with -fPIC
-###############################################################################
-cd ~/
-curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb
-ar x ~/libgfortran-10-dev.deb
-tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/
-cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/
+SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+source $SCRIPTPATH/aarch64_ci_setup.sh
 
 ###############################################################################
 # Run aarch64 builder python
diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh
new file mode 100644
index 000000000..c7065056a
--- /dev/null
+++ b/aarch64_linux/aarch64_ci_setup.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+set -eux -o pipefail
+
+# This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script
+# as we need to install conda and setup the python version for the build.
+
+CONDA_PYTHON_EXE=/opt/conda/bin/python
+CONDA_EXE=/opt/conda/bin/conda
+PATH=/opt/conda/bin:$PATH
+LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
+
+###############################################################################
+# Install OS dependent packages
+###############################################################################
+yum -y install epel-release
+yum -y install less zstd libgomp
+
+###############################################################################
+# Install conda
+# disable SSL_verify due to getting "Could not find a suitable TLS CA certificate bundle, invalid path"
+# when using Python version, less than the conda latest
+###############################################################################
+echo 'Installing conda-forge'
+curl -L -o /mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh
+chmod +x /mambaforge.sh
+/mambaforge.sh -b -p /opt/conda
+rm /mambaforge.sh
+/opt/conda/bin/conda config --set ssl_verify False
+/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas
+python --version
+conda --version
+
+###############################################################################
+# Exec libglfortran.a hack
+#
+# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC.
+# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get
+# ubuntu's libgfortran.a which is compiled with -fPIC
+###############################################################################
+cd ~/
+curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb
+ar x ~/libgfortran-10-dev.deb
+tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/
+cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/

From 14851d94f784d04534433288cb6dbc448a8f8193 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 8 Aug 2023 14:06:58 -0400
Subject: [PATCH 003/212] Run  git update-index --chmod=+x aarch64_ci_setup.sh
 (#1471)

---
 aarch64_linux/aarch64_ci_setup.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 aarch64_linux/aarch64_ci_setup.sh

diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh
old mode 100644
new mode 100755

From bb821d4afed1b7ce9a2fd2ee043d173cde2dfd5c Mon Sep 17 00:00:00 2001
From: Mike Schneider <104035434+xncqr@users.noreply.github.com>
Date: Wed, 9 Aug 2023 11:02:34 -0700
Subject: [PATCH 004/212] [aarch64][CICD]Add aarch64 docker image build.
 (#1472)

* Add aarch64 docker image build

* removing ulimit for PT workflow

* set aarch64 worker for docker build
---
 .github/workflows/build-manywheel-images.yml | 17 ++++
 aarch64_linux/aarch64_ci_setup.sh            | 21 +----
 aarch64_linux/aarch64_wheel_ci_build.py      | 67 ++++++++-------
 manywheel/Dockerfile_aarch64                 | 86 ++++++++++++++++++++
 manywheel/build_all_docker.sh                |  2 +
 manywheel/build_docker.sh                    |  8 ++
 6 files changed, 147 insertions(+), 54 deletions(-)
 create mode 100644 manywheel/Dockerfile_aarch64

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index 69af67803..df890f038 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -12,6 +12,7 @@ on:
     paths:
       - .github/workflows/build-manywheel-images.yml
       - manywheel/Dockerfile
+      - manywheel/Dockerfile_aarch64
       - manywheel/Dockerfile_cxx11-abi
       - manywheel/build_docker.sh
       - 'common/*'
@@ -19,6 +20,7 @@ on:
     paths:
       - .github/workflows/build-manywheel-images.yml
       - manywheel/Dockerfile
+      - manywheel/Dockerfile_aarch64
       - manywheel/Dockerfile_cxx11-abi
       - 'common/*'
       - manywheel/build_docker.sh
@@ -82,6 +84,21 @@ jobs:
       - name: Build Docker Image
         run: |
           manywheel/build_docker.sh
+  build-docker-cpu-aarch64:
+    runs-on: linux.t4g.2xlarge
+    env:
+      GPU_ARCH_TYPE: cpu-aarch64
+    steps:
+      - name: Checkout PyTorch
+        uses: actions/checkout@v3
+      - name: Authenticate if WITH_PUSH
+        run: |
+          if [[ "${WITH_PUSH}" == true ]]; then
+            echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
+          fi
+      - name: Build Docker Image
+        run: |
+          manywheel/build_docker.sh
   build-docker-cpu-cxx11-abi:
     runs-on: ubuntu-22.04
     env:
diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh
index c7065056a..6d2d780fe 100755
--- a/aarch64_linux/aarch64_ci_setup.sh
+++ b/aarch64_linux/aarch64_ci_setup.sh
@@ -9,12 +9,6 @@ CONDA_EXE=/opt/conda/bin/conda
 PATH=/opt/conda/bin:$PATH
 LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
 
-###############################################################################
-# Install OS dependent packages
-###############################################################################
-yum -y install epel-release
-yum -y install less zstd libgomp
-
 ###############################################################################
 # Install conda
 # disable SSL_verify due to getting "Could not find a suitable TLS CA certificate bundle, invalid path"
@@ -26,19 +20,6 @@ chmod +x /mambaforge.sh
 /mambaforge.sh -b -p /opt/conda
 rm /mambaforge.sh
 /opt/conda/bin/conda config --set ssl_verify False
-/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas
+/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas ninja scons
 python --version
 conda --version
-
-###############################################################################
-# Exec libglfortran.a hack
-#
-# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC.
-# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get
-# ubuntu's libgfortran.a which is compiled with -fPIC
-###############################################################################
-cd ~/
-curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb
-ar x ~/libgfortran-10-dev.deb
-tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/
-cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/
diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 125cfe9fd..5d80a95e4 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# encoding: UTF-8
 
 import os
 import subprocess
@@ -6,36 +7,36 @@
 from typing import List
 
 
-''''
-Helper for getting paths for Python
-'''
 def list_dir(path: str) -> List[str]:
-     return subprocess.check_output(["ls", "-1", path]).decode().split("\n")
+    ''''
+    Helper for getting paths for Python
+    '''
+    return subprocess.check_output(["ls", "-1", path]).decode().split("\n")
 
 
-'''
-Using ArmComputeLibrary for aarch64 PyTorch
-'''
 def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
+    '''
+    Using ArmComputeLibrary for aarch64 PyTorch
+    '''
     print('Building Arm Compute Library')
     os.system("cd / && mkdir /acl")
     os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.05.1 {git_clone_flags}")
     os.system('sed -i -e \'s/"armv8.2-a"/"armv8-a"/g\' ComputeLibrary/SConscript; '
               'sed -i -e \'s/-march=armv8.2-a+fp16/-march=armv8-a/g\' ComputeLibrary/SConstruct; '
               'sed -i -e \'s/"-march=armv8.2-a"/"-march=armv8-a"/g\' ComputeLibrary/filedefs.json')
-    os.system(f"cd ComputeLibrary; export acl_install_dir=/acl; " \
-                f"scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; " \
-                f"cp -r arm_compute $acl_install_dir; " \
-                f"cp -r include $acl_install_dir; " \
-                f"cp -r utils $acl_install_dir; " \
-                f"cp -r support $acl_install_dir; " \
-                f"cp -r src $acl_install_dir; cd /")
-
-
-'''
-Complete wheel build and put in artifact location
-'''
+    os.system("cd ComputeLibrary; export acl_install_dir=/acl; "
+              "scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; "
+              "cp -r arm_compute $acl_install_dir; "
+              "cp -r include $acl_install_dir; "
+              "cp -r utils $acl_install_dir; "
+              "cp -r support $acl_install_dir; "
+              "cp -r src $acl_install_dir; cd /")
+
+
 def complete_wheel(folder: str):
+    '''
+    Complete wheel build and put in artifact location
+    '''
     wheel_name = list_dir(f"/{folder}/dist")[0]
 
     if "pytorch" in folder:
@@ -54,10 +55,10 @@ def complete_wheel(folder: str):
     return repaired_wheel_name
 
 
-'''
-Parse inline arguments
-'''
 def parse_arguments():
+    '''
+    Parse inline arguments
+    '''
     from argparse import ArgumentParser
     parser = ArgumentParser("AARCH64 wheels python CD")
     parser.add_argument("--debug", action="store_true")
@@ -67,11 +68,10 @@ def parse_arguments():
     return parser.parse_args()
 
 
-'''
-Entry Point
-'''
 if __name__ == '__main__':
-
+    '''
+    Entry Point
+    '''
     args = parse_arguments()
     enable_mkldnn = args.enable_mkldnn
     repo = Repository('/pytorch')
@@ -80,15 +80,14 @@ def parse_arguments():
         branch = 'master'
 
     git_clone_flags = " --depth 1 --shallow-submodules"
-    os.system(f"conda install -y ninja scons")
 
     print('Building PyTorch wheel')
     build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
-    os.system(f"python setup.py clean")
+    os.system("python setup.py clean")
 
     if branch == 'nightly' or branch == 'master':
-        build_date = subprocess.check_output(['git','log','--pretty=format:%cs','-1'], cwd='/pytorch').decode().replace('-','')
-        version = subprocess.check_output(['cat','version.txt'], cwd='/pytorch').decode().strip()[:-2]
+        build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
+        version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
     if branch.startswith("v1.") or branch.startswith("v2."):
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
@@ -96,10 +95,10 @@ def parse_arguments():
         build_ArmComputeLibrary(git_clone_flags)
         print("build pytorch with mkldnn+acl backend")
         build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " \
-            "ACL_ROOT_DIR=/acl " \
-            "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \
-            "ACL_INCLUDE_DIR=/acl/build " \
-            "ACL_LIBRARY=/acl/build "
+                      "ACL_ROOT_DIR=/acl " \
+                      "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \
+                      "ACL_INCLUDE_DIR=/acl/build " \
+                      "ACL_LIBRARY=/acl/build "
     else:
         print("build pytorch without mkldnn backend")
 
diff --git a/manywheel/Dockerfile_aarch64 b/manywheel/Dockerfile_aarch64
new file mode 100644
index 000000000..abfc2fd84
--- /dev/null
+++ b/manywheel/Dockerfile_aarch64
@@ -0,0 +1,86 @@
+FROM quay.io/pypa/manylinux2014_aarch64 as base
+
+
+# Graviton needs GCC 10 for the build
+ARG DEVTOOLSET_VERSION=10
+
+# Language variabes
+ENV LC_ALL=en_US.UTF-8
+ENV LANG=en_US.UTF-8
+ENV LANGUAGE=en_US.UTF-8
+
+# Installed needed OS packages. This is to support all
+# the binary builds (torch, vision, audio, text, data)
+RUN yum -y install epel-release
+RUN yum -y update
+RUN yum install -y \
+  autoconf \
+  automake \
+  bison \
+  bzip2 \
+  curl \
+  diffutils \
+  file \
+  git \
+  make \
+  patch \
+  perl \
+  unzip \
+  util-linux \
+  wget \
+  which \
+  xz \
+  yasm \
+  less \
+  zstd \
+  libgomp \
+  devtoolset-${DEVTOOLSET_VERSION}-gcc \
+  devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ \
+  devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
+  devtoolset-${DEVTOOLSET_VERSION}-binutils
+
+# Ensure the expected devtoolset is used
+ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
+ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
+
+
+# git236+ would refuse to run git commands in repos owned by other users
+# Which causes version check to fail, as pytorch repo is bind-mounted into the image
+# Override this behaviour by treating every folder as safe
+# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
+RUN git config --global --add safe.directory "*"
+
+
+###############################################################################
+# libglfortran.a hack
+#
+# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC.
+# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get
+# ubuntu's libgfortran.a which is compiled with -fPIC
+# NOTE: Need a better way to get this library as Ubuntu's package can be removed by the vender, or changed
+###############################################################################
+RUN cd ~/ \
+  && curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb \
+  && ar x ~/libgfortran-10-dev.deb \
+  && tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ \
+  && cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/
+
+# install cmake
+RUN yum install -y cmake3 && \
+    ln -s /usr/bin/cmake3 /usr/bin/cmake
+
+FROM base as openssl
+# Install openssl (this must precede `build python` step)
+# (In order to have a proper SSL module, Python is compiled
+# against a recent openssl [see env vars above], which is linked
+# statically. We delete openssl afterwards.)
+ADD ./common/install_openssl.sh install_openssl.sh
+RUN bash ./install_openssl.sh && rm install_openssl.sh
+ENV SSL_CERT_FILE=/opt/_internal/certs.pem
+
+FROM openssl as final
+# remove unncessary python versions
+RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
+RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
+RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
+RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh
index 97c1f89ab..2bd720f2f 100644
--- a/manywheel/build_all_docker.sh
+++ b/manywheel/build_all_docker.sh
@@ -7,6 +7,8 @@ TOPDIR=$(git rev-parse --show-toplevel)
 GPU_ARCH_TYPE=cpu "${TOPDIR}/manywheel/build_docker.sh"
 MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cpu "${TOPDIR}/manywheel/build_docker.sh"
 
+GPU_ARCH_TYPE=cpu-aarch64 "${TOPDIR}/manywheel/build_docker.sh"
+
 GPU_ARCH_TYPE=cpu-cxx11-abi "${TOPDIR}/manywheel/build_docker.sh"
 
 for cuda_version in 12.1 11.8; do
diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh
index 38d043ff4..e547b4275 100755
--- a/manywheel/build_docker.sh
+++ b/manywheel/build_docker.sh
@@ -20,6 +20,14 @@ case ${GPU_ARCH_TYPE} in
         GPU_IMAGE=centos:7
         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
         ;;
+    cpu-aarch64)
+        TARGET=final
+        DOCKER_TAG=cpu-aarch64
+        LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-cpu-aarch64
+        GPU_IMAGE=arm64v8/centos:7
+        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10"
+        MANY_LINUX_VERSION="aarch64"
+        ;;
     cpu-cxx11-abi)
         TARGET=final
         DOCKER_TAG=cpu-cxx11-abi

From 8715349b3ffbb67a9cf7054491da6f3296d62e75 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Sat, 12 Aug 2023 00:37:59 +0000
Subject: [PATCH 005/212] Fix `install_conda.sh`

By pinning conda version to 23.5.2 as latest(23.7.2 at this time) does not have a compatible version of `git` packages

Fixes https://github.com/pytorch/builder/issues/1473
---
 common/install_conda.sh | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/common/install_conda.sh b/common/install_conda.sh
index bd0607525..6ae978f05 100644
--- a/common/install_conda.sh
+++ b/common/install_conda.sh
@@ -3,13 +3,18 @@
 set -ex
 
 # Anaconda
-wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-chmod +x  Miniconda3-latest-Linux-x86_64.sh
+# Latest anaconda is using openssl-3 which is incompatible with all currently published versions of git
+# Which are using openssl-1.1.1, see https://anaconda.org/anaconda/git/files?version=2.40.1 for example
+MINICONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-Linux-x86_64.sh
+wget -q $MINICONDA_URL
 # NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
-bash ./Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda
-rm Miniconda3-latest-Linux-x86_64.sh
+bash $(basename "$MINICONDA_URL") -b -p /opt/conda
+rm $(basename "$MINICONDA_URL")
 export PATH=/opt/conda/bin:$PATH
+# See https://github.com/pytorch/builder/issues/1473
+# Pin conda to 23.5.2 as it's the last one compatible with openssl-1.1.1
+conda install -y conda=23.5.2 conda-build anaconda-client git ninja
 # The cmake version here needs to match with the minimum version of cmake
 # supported by PyTorch (3.18). There is only 3.18.2 on anaconda
-conda install -y conda-build anaconda-client git ninja cmake=3.18.2
+/opt/conda/bin/pip3 install cmake==3.18.2
 conda remove -y --force patchelf

From 912957ae6690d7dd5af7261b5c4ec8e237a35e63 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Sat, 12 Aug 2023 01:26:09 +0000
Subject: [PATCH 006/212] Remove explicit `conda install cmake`

As it's already done as part of `common/install_conda.sh` script
---
 libtorch/Dockerfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile
index a71d78594..c01c6416e 100644
--- a/libtorch/Dockerfile
+++ b/libtorch/Dockerfile
@@ -44,7 +44,6 @@ ENV CUDA_HOME /usr/local/cuda
 FROM base as conda
 ADD ./common/install_conda.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
-RUN /opt/conda/bin/conda install -y cmake=3.18
 
 FROM cuda as cuda11.8
 RUN bash ./install_cuda.sh 11.8

From 941be28cb5c686dc41b7ea8681701e64192c3002 Mon Sep 17 00:00:00 2001
From: ptrblck <ptrblck@users.noreply.github.com>
Date: Fri, 11 Aug 2023 21:20:54 -0700
Subject: [PATCH 007/212] update to CUDA 12.1U1 (#1476)

Should fix  pytorch/pytorch#94772 in wheel builds
---
 common/install_cuda.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index 6972c61b1..e087a44c1 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -37,10 +37,10 @@ function install_121 {
     echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1"
     rm -rf /usr/local/cuda-12.1 /usr/local/cuda
     # install CUDA 12.1.0 in the same container
-    wget -q https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run
-    chmod +x cuda_12.1.0_530.30.02_linux.run
-    ./cuda_12.1.0_530.30.02_linux.run --toolkit --silent
-    rm -f cuda_12.1.0_530.30.02_linux.run
+    wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
+    chmod +x cuda_12.1.1_530.30.02_linux.run
+    ./cuda_12.1.1_530.30.02_linux.run --toolkit --silent
+    rm -f cuda_12.1.1_530.30.02_linux.run
     rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
 
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement

From 19007fecd4f1990f7392fb50d66f5fb9faadcf33 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 14 Aug 2023 11:28:26 -0400
Subject: [PATCH 008/212] Use conda version 23.5.2 for conda pytorch build
 (#1477)

---
 conda/build_pytorch.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 19c7c5b63..748d2604e 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -337,7 +337,7 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
     # Build the package
     echo "Build $build_folder for Python version $py_ver"
     conda config --set anaconda_upload no
-    conda install -y conda-package-handling conda==22.9.0
+    conda install -y conda-package-handling conda==23.5.2
 
     if [[ "$OSTYPE" == "msys" ]]; then
       # Don't run tests on windows (they were ignored mostly anyways)

From 5585c052357c87026e75160cfa2c35911e51d4f0 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 14 Aug 2023 13:48:14 -0400
Subject: [PATCH 009/212] Use py311 miniconda install (#1479)

---
 conda/build_pytorch.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 748d2604e..c25cd4f6a 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -201,7 +201,7 @@ if [[ "$(uname)" == 'Darwin' ]]; then
     miniconda_sh="${MAC_PACKAGE_WORK_DIR}/miniconda.sh"
     rm -rf "$tmp_conda"
     rm -f "$miniconda_sh"
-    retry curl -sS https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-x86_64.sh -o "$miniconda_sh"
+    retry curl -sS https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-MacOSX-x86_64.sh -o "$miniconda_sh"
     chmod +x "$miniconda_sh" && \
         "$miniconda_sh" -b -p "$tmp_conda" && \
         rm "$miniconda_sh"
@@ -212,7 +212,7 @@ elif [[ "$OSTYPE" == "msys" ]]; then
     export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
     rm -rf "$tmp_conda"
     rm -f "$miniconda_exe"
-    curl -sSk https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Windows-x86_64.exe -o "$miniconda_exe"
+    curl -sSk https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-Windows-x86_64.exe -o "$miniconda_exe"
     "$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe"
     pushd $tmp_conda
     export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH"

From 963129206eeef51062aa2c881f0c57e39282e72c Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 14 Aug 2023 14:09:42 -0400
Subject: [PATCH 010/212] Windows conda build fix (#1480)

---
 conda/build_pytorch.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index c25cd4f6a..5501a25a2 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -337,12 +337,14 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
     # Build the package
     echo "Build $build_folder for Python version $py_ver"
     conda config --set anaconda_upload no
-    conda install -y conda-package-handling conda==23.5.2
 
     if [[ "$OSTYPE" == "msys" ]]; then
       # Don't run tests on windows (they were ignored mostly anyways)
       NO_TEST="--no-test"
+      # Fow windows need to keep older conda version
+      conda install -y conda-package-handling conda==22.9.0
     else
+      conda install -y conda-package-handling conda==23.5.2
       # NS: To be removed after conda docker images are updated
       conda update -y conda-build
     fi

From 9f0c8ebdc77d5a1e0445839379cb3e69a6de29f0 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 14 Aug 2023 14:18:03 -0400
Subject: [PATCH 011/212] Revert "Use py311 miniconda install (#1479)" (#1481)

This reverts commit 5585c052357c87026e75160cfa2c35911e51d4f0.
---
 conda/build_pytorch.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 5501a25a2..ef630e155 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -201,7 +201,7 @@ if [[ "$(uname)" == 'Darwin' ]]; then
     miniconda_sh="${MAC_PACKAGE_WORK_DIR}/miniconda.sh"
     rm -rf "$tmp_conda"
     rm -f "$miniconda_sh"
-    retry curl -sS https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-MacOSX-x86_64.sh -o "$miniconda_sh"
+    retry curl -sS https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-x86_64.sh -o "$miniconda_sh"
     chmod +x "$miniconda_sh" && \
         "$miniconda_sh" -b -p "$tmp_conda" && \
         rm "$miniconda_sh"
@@ -212,7 +212,7 @@ elif [[ "$OSTYPE" == "msys" ]]; then
     export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
     rm -rf "$tmp_conda"
     rm -f "$miniconda_exe"
-    curl -sSk https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-Windows-x86_64.exe -o "$miniconda_exe"
+    curl -sSk https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Windows-x86_64.exe -o "$miniconda_exe"
     "$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe"
     pushd $tmp_conda
     export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH"

From 82aef70b417124d92783b3376abc1f6519d181ce Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 14 Aug 2023 17:17:22 -0400
Subject: [PATCH 012/212] Remove c/cb folder on windows (#1482)

---
 conda/build_pytorch.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index ef630e155..88626a34a 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -286,9 +286,7 @@ fi
 
 # Some tricks for sccache with conda builds on Windows
 if [[ "$OSTYPE" == "msys" && "$USE_SCCACHE" == "1" ]]; then
-    if [[ ! -d "/c/cb" ]]; then
-        rm -rf /c/cb
-    fi
+    rm -rf /c/cb
     mkdir -p /c/cb/pytorch_1000000000000
     export CONDA_BLD_PATH="C:\\cb"
     export CONDA_BUILD_EXTRA_ARGS="--dirty"

From 64187393523109aefd1c3ec2f2193bc40d50d410 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 15 Aug 2023 09:36:33 -0400
Subject: [PATCH 013/212] Add numpy install - fix windows smoke tests (#1483)

* Add numpy install

* Add numpy install
---
 windows/internal/smoke_test.bat | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index decb2cfb1..ad276b992 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -91,7 +91,9 @@ call %CONDA_HOME%\condabin\activate.bat testenv
 if errorlevel 1 exit /b 1
 
 :: do conda install to make sure all the dependencies are installed
-call conda install -yq pytorch %CONDA_EXTRA_ARGS%
+:: Install numpy see: https://github.com/pytorch/pytorch/issues/107228
+:: todo: Remove numpy install once the issue above is resolved
+call conda install -yq numpy pytorch %CONDA_EXTRA_ARGS%
 if ERRORLEVEL 1 exit /b 1
 
 set /a CUDA_VER=%CUDA_VERSION%

From 3f44ffe2a901fe78ed0b5161d5509a80d9a7fe83 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 15 Aug 2023 10:20:24 -0400
Subject: [PATCH 014/212] Add hostedtoolcache purge step (#1484)

* Add hostedtoolcache purge step

* Change step name
---
 .github/workflows/build-manywheel-images.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index df890f038..3bf2dbe70 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -42,6 +42,8 @@ jobs:
       GPU_ARCH_TYPE: cuda
       GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
     steps:
+      - name: Purge tools folder (free space for build)
+        run: rm -rf /opt/hostedtoolcache
       - name: Checkout PyTorch builder
         uses: actions/checkout@v3
       - name: Authenticate if WITH_PUSH

From 2db96763ce9f9eae959982cb1272cfc56a72eefc Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 15 Aug 2023 10:33:32 -0400
Subject: [PATCH 015/212] Update CUDA_UPGRADE_GUIDE.MD

---
 CUDA_UPGRADE_GUIDE.MD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CUDA_UPGRADE_GUIDE.MD b/CUDA_UPGRADE_GUIDE.MD
index bee03ecd1..ca8687cbd 100644
--- a/CUDA_UPGRADE_GUIDE.MD
+++ b/CUDA_UPGRADE_GUIDE.MD
@@ -52,7 +52,7 @@ There are three types of Docker containers we maintain in order to build Linux b
 
 ## 3. Update Magma for Linux
 Build Magma for Linux. Our Linux CUDA jobs use conda, so we need to build magma-cuda116 and push it to anaconda:
-1. Follow this [PR 997](https://github.com/pytorch/builder/pull/997) for all steps in this section
+1. Follow this [PR 1368](https://github.com/pytorch/builder/pull/1368) for all steps in this section
 2. Currently, this is mainly copy-paste in [`magma/Makefile`](magma/Makefile) if there are no major code API changes/deprecations to the CUDA version. Previously, we've needed to add patches to MAGMA, so this may be something to check with NVIDIA about.
 3. To push the package, please update build-magma-linux workflow [PR 897](https://github.com/pytorch/builder/pull/897).
 4. NOTE: This step relies on the conda-builder image (changes to `.github/workflows/build-conda-images.yml`), so make sure you have pushed the new conda-builder prior. Validate this step by logging into anaconda.org and seeing your package deployed for example [here](https://anaconda.org/pytorch/magma-cuda115)

From 294487f161d49c7128dae12188a9b70f93303c5f Mon Sep 17 00:00:00 2001
From: ptrblck <ptrblck@users.noreply.github.com>
Date: Tue, 15 Aug 2023 08:40:18 -0700
Subject: [PATCH 016/212] update CUDA to 12.1U1 for Windows (#1485)

---
 windows/internal/cuda_install.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/windows/internal/cuda_install.bat b/windows/internal/cuda_install.bat
index fd20541ee..acd457a17 100644
--- a/windows/internal/cuda_install.bat
+++ b/windows/internal/cuda_install.bat
@@ -54,7 +54,7 @@ goto cuda_common
 
 :cuda121
 
-set CUDA_INSTALL_EXE=cuda_12.1.0_531.14_windows.exe
+set CUDA_INSTALL_EXE=cuda_12.1.1_531.14_windows.exe
 if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
     if errorlevel 1 exit /b 1

From c8a03796b91d6e436ba3ed3c2c0c69af7785462d Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 16 Aug 2023 09:27:32 -0400
Subject: [PATCH 017/212] Small improvements in build pytorch script (#1486)

---
 conda/build_pytorch.sh | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 88626a34a..12c0fb827 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -98,13 +98,11 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
     # These are passed to tools/build_pytorch_libs.sh::build_caffe2()
     EXTRA_CAFFE2_CMAKE_FLAGS=()
 fi
+
 if [[ -z "$DESIRED_PYTHON" ]]; then
-    if [[ "$OSTYPE" == "msys" ]]; then
-        DESIRED_PYTHON=('3.5' '3.6' '3.7')
-    else
-        DESIRED_PYTHON=('2.7' '3.5' '3.6' '3.7' '3.8')
-    fi
+    DESIRED_PYTHON=('3.8')
 fi
+
 if [[ "$OSTYPE" == "darwin"* ]]; then
     DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer
 fi
@@ -366,7 +364,7 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
     # TODO these reqs are hardcoded for pytorch-nightly
     test_env="env_$folder_tag"
     retry conda create -yn "$test_env" python="$py_ver"
-    source activate "$test_env"
+    conda activate "$test_env"
 
     # Extract the package for testing
     ls -lah "$output_folder"
@@ -410,14 +408,19 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
     fi
 
     # Clean up test folder
-    source deactivate
+    conda deactivate
     conda env remove -yn "$test_env"
     rm -rf "$output_folder"
 done
 
 # Cleanup the tricks for sccache with conda builds on Windows
 if [[ "$OSTYPE" == "msys" ]]; then
+    # Please note sometimes we get Device or resource busy during
+    # this cleanup step. We don't want to fail the build because of this
+    # hence adding +e, -e around the cleanup step
+    set +e
     rm -rf /c/cb/pytorch_1000000000000
+    set -e
     unset CONDA_BLD_PATH
 fi
 unset CONDA_BUILD_EXTRA_ARGS

From 3e7a8560d3979e079d21b2700fb5776eb1c6bbf8 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 16 Aug 2023 13:34:10 -0400
Subject: [PATCH 018/212] Undo using conda activate (#1487)

---
 conda/build_pytorch.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 12c0fb827..6f8eaf502 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -364,7 +364,7 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
     # TODO these reqs are hardcoded for pytorch-nightly
     test_env="env_$folder_tag"
     retry conda create -yn "$test_env" python="$py_ver"
-    conda activate "$test_env"
+    source activate "$test_env"
 
     # Extract the package for testing
     ls -lah "$output_folder"
@@ -408,7 +408,7 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
     fi
 
     # Clean up test folder
-    conda deactivate
+    source deactivate
     conda env remove -yn "$test_env"
     rm -rf "$output_folder"
 done

From a5aa27fd0254b4f389293ae1a721b82e6fa24030 Mon Sep 17 00:00:00 2001
From: JYX <jyx21@mails.tsinghua.edu.cn>
Date: Sat, 19 Aug 2023 04:30:50 +0800
Subject: [PATCH 019/212] Update meta.yaml (#1389)

---
 conda/pytorch-nightly/meta.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 52db9ac0e..5cab9f53d 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -42,6 +42,9 @@ requirements:
     {% endif %}
     - libuv # [win]
     - intel-openmp # [win]
+    # llvm-openmp 16 leads to wrong processor affinity for fork child, see #99625.
+    # Before a decent fix, force llvm-openmp version <16.
+    - llvm-openmp <16 # [linux]
     - typing_extensions
     - sympy
     - filelock

From eba456f07dca6de6fee488c163a40d97f9e4db9c Mon Sep 17 00:00:00 2001
From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com>
Date: Tue, 22 Aug 2023 18:37:16 -0500
Subject: [PATCH 020/212] Add pytorch-triton-rocm as an install dependency for
 ROCm (#1463)

* Add pytorch-triton-rocm as an install dependency for ROCm

* Update build_rocm.sh
---
 manywheel/build_rocm.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh
index fbbf7d3a6..80ebde6f4 100755
--- a/manywheel/build_rocm.sh
+++ b/manywheel/build_rocm.sh
@@ -214,6 +214,18 @@ elif [[ $ROCM_INT -ge 50600 ]]; then
     DEPS_AUX_DSTLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_DST/})
 fi
 
+# Add triton install dependency
+if [[ $(uname) == "Linux" ]]; then
+    TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-rocm.txt)
+    TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
+
+    if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
+        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="pytorch-triton-rocm==${TRITON_VERSION}+${TRITON_SHORTHASH}"
+    else
+        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | pytorch-triton-rocm==${TRITON_VERSION}+${TRITON_SHORTHASH}"
+    fi
+fi
+
 
 echo "PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH}"
 

From 331b031b85e8a90cdb06e3db2b73e6b4e08c742a Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 23 Aug 2023 11:23:28 -0400
Subject: [PATCH 021/212] Add aarch64 to validation framework (#1474)

---
 .github/scripts/validate_binaries.sh          |  2 +
 .../validate-aarch64-linux-binaries.yml       | 61 +++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 .github/workflows/validate-aarch64-linux-binaries.yml

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 56668ab77..cdcbea30b 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -27,6 +27,8 @@ else
 
     if [[ ${TARGET_OS} == 'windows' ]]; then
         python  ./test/smoke_test/smoke_test.py
+    elif [[ ${TARGET_OS} == 'aarch64-linux'  ]]; then
+        python3  ./test/smoke_test/smoke_test.py  --package=torchonly
     else
         python3  ./test/smoke_test/smoke_test.py
     fi
diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
new file mode 100644
index 000000000..3ffefa52a
--- /dev/null
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -0,0 +1,61 @@
+name: Validate Aarch64 linux binaries
+
+on:
+  workflow_call:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: string
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+  workflow_dispatch:
+    inputs:
+      channel:
+        description: "Channel to use (nightly, test, release, all)"
+        required: true
+        type: choice
+        options:
+          - release
+          - nightly
+          - test
+          - all
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+
+jobs:
+  generate-aarch64-linux-matrix:
+    uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
+    with:
+      package-type: wheel
+      os: linux-aarch64
+      channel: ${{ inputs.channel }}
+      with-cuda: disable
+
+  linux:
+    needs: generate-aarch64-linux-matrix
+    strategy:
+      matrix: ${{ fromJson(needs.generate-linux-matrix.outputs.matrix) }}
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    name: ${{ matrix.build_name }}
+    with:
+      runner: ${{ matrix.validation_runner }}
+      repository: "pytorch/builder"
+      ref: ${{ inputs.ref || github.ref }}
+      job-name: ${{ matrix.build_name }}
+      binary-matrix: ${{ toJSON(matrix) }}
+      script: |
+        set -ex
+        export ENV_NAME="conda-env-${{ github.run_id }}"
+        export TARGET_OS="aarch64-linux"
+        eval "$(conda shell.bash hook)"
+
+        # Standart case: Validate binaries
+        source ./.github/scripts/validate_binaries.sh

From 3f1e42f065de54b3044fe39f273aeaa5452162e2 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 23 Aug 2023 11:40:14 -0400
Subject: [PATCH 022/212] Add aarch64 to validation framework (#1489)

---
 .github/scripts/validate_binaries.sh                  | 2 +-
 .github/workflows/validate-aarch64-linux-binaries.yml | 2 +-
 .github/workflows/validate-binaries.yml               | 8 ++++++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index cdcbea30b..e1d0ec75b 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -27,7 +27,7 @@ else
 
     if [[ ${TARGET_OS} == 'windows' ]]; then
         python  ./test/smoke_test/smoke_test.py
-    elif [[ ${TARGET_OS} == 'aarch64-linux'  ]]; then
+    elif [[ ${TARGET_OS} == 'linux-aarch64'  ]]; then
         python3  ./test/smoke_test/smoke_test.py  --package=torchonly
     else
         python3  ./test/smoke_test/smoke_test.py
diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index 3ffefa52a..a174e57be 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -54,7 +54,7 @@ jobs:
       script: |
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
-        export TARGET_OS="aarch64-linux"
+        export TARGET_OS="linux-aarch64"
         eval "$(conda shell.bash hook)"
 
         # Standart case: Validate binaries
diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
index 2a6106a20..080352f90 100644
--- a/.github/workflows/validate-binaries.yml
+++ b/.github/workflows/validate-binaries.yml
@@ -32,6 +32,7 @@ on:
         options:
           - windows
           - linux
+          - linux-aarch64
           - macos
           - all
       channel:
@@ -65,6 +66,13 @@ jobs:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
 
+  linux:
+    if:  inputs.os == 'linux-aarch64'
+    uses: ./.github/workflows/validate-aarch64-linux-binaries.yml
+    with:
+      channel: ${{ inputs.channel }}
+      ref: ${{ inputs.ref || github.ref }}
+
   mac:
     if:  inputs.os == 'macos' || inputs.os == 'all'
     uses: ./.github/workflows/validate-macos-binaries.yml

From 1656d22c7f4ca6a230cf4656fd62406dc80772b4 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 23 Aug 2023 11:47:08 -0400
Subject: [PATCH 023/212] Add aarch64 to validation framework (#1490)

* Add aarch64 to validation framework

* Add aarch64 to validation framework
---
 .github/workflows/validate-binaries.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
index 080352f90..7a679a267 100644
--- a/.github/workflows/validate-binaries.yml
+++ b/.github/workflows/validate-binaries.yml
@@ -66,7 +66,7 @@ jobs:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
 
-  linux:
+  linux-aarch64:
     if:  inputs.os == 'linux-aarch64'
     uses: ./.github/workflows/validate-aarch64-linux-binaries.yml
     with:

From 0ceb5a98f8f1fe762167b1d16de7028c83de3066 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 23 Aug 2023 11:58:29 -0400
Subject: [PATCH 024/212] Add aarch64 to validation framework (#1491)

* Add aarch64 to validation framework

* Add aarch64 to validation framework

* Add aarch64 to validation framework
---
 .github/workflows/validate-aarch64-linux-binaries.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index a174e57be..4b1f06720 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -38,10 +38,10 @@ jobs:
       channel: ${{ inputs.channel }}
       with-cuda: disable
 
-  linux:
+  linux-aarch64:
     needs: generate-aarch64-linux-matrix
     strategy:
-      matrix: ${{ fromJson(needs.generate-linux-matrix.outputs.matrix) }}
+      matrix: ${{ fromJson(needs.generate-aarch64-linux-matrix.outputs.matrix) }}
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     name: ${{ matrix.build_name }}

From 897b1dfd70acbe85df155c0ba3e7eb731b2dca47 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 23 Aug 2023 12:45:03 -0400
Subject: [PATCH 025/212] Temporary disable poetry test (#1492)

---
 .github/workflows/validate-linux-binaries.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index e33d89fb6..13d980488 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -61,7 +61,8 @@ jobs:
            ([[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} != "release" ]] || \
             [[ ${MATRIX_GPU_ARCH_VERSION} == "11.7" && ${MATRIX_CHANNEL} == "release" ]]); then
           source ./.github/scripts/validate_pipy.sh --runtime-error-check disabled
-          source ./.github/scripts/validate_poetry.sh --runtime-error-check disabled
+          # temporary disable poetry check
+          # source ./.github/scripts/validate_poetry.sh --runtime-error-check disabled
         fi
 
         # Standart case: Validate binaries

From 63e0dab0723b6f733b551d2747326630450335e0 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 23 Aug 2023 17:00:34 -0400
Subject: [PATCH 026/212] Add torchonly option to validation workflows (#1494)

* Add torchonly option to validation workflows

* fix typo
---
 .github/scripts/validate_binaries.sh              |  9 +++++++--
 .../workflows/validate-aarch64-linux-binaries.yml | 11 +++++++++++
 .github/workflows/validate-binaries.yml           | 15 +++++++++++++++
 .github/workflows/validate-linux-binaries.yml     | 11 +++++++++++
 .../workflows/validate-macos-arm64-binaries.yml   | 11 +++++++++++
 .github/workflows/validate-macos-binaries.yml     | 11 +++++++++++
 .github/workflows/validate-windows-binaries.yml   | 11 +++++++++++
 7 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index e1d0ec75b..6b4bccd6b 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -6,6 +6,11 @@ else
     conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
     conda activate ${ENV_NAME}
     INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
+    TEST_SUFFIX=""
+    if [[ ${TORCH_ONLY} == 'true' ]]; then
+        INSTALLATION=${INSTALLATION/"torchvision torchaudio"/""}
+        TEST_SUFFIX=" --package torchonly"
+    fi
 
     export OLD_PATH=${PATH}
     # Workaround macos-arm64 runners. Issue: https://github.com/pytorch/test-infra/issues/4342
@@ -26,11 +31,11 @@ else
     fi
 
     if [[ ${TARGET_OS} == 'windows' ]]; then
-        python  ./test/smoke_test/smoke_test.py
+        python  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
     elif [[ ${TARGET_OS} == 'linux-aarch64'  ]]; then
         python3  ./test/smoke_test/smoke_test.py  --package=torchonly
     else
-        python3  ./test/smoke_test/smoke_test.py
+        python3  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
     fi
 
     if [[ ${TARGET_OS} == 'macos-arm64' ]]; then
diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index 4b1f06720..92b5e48ea 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -12,6 +12,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -28,6 +33,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-aarch64-linux-matrix:
@@ -55,6 +65,7 @@ jobs:
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="linux-aarch64"
+        export TORCH_ONLY=${{ inputs.torchonly }}
         eval "$(conda shell.bash hook)"
 
         # Standart case: Validate binaries
diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
index 7a679a267..9c877f4ed 100644
--- a/.github/workflows/validate-binaries.yml
+++ b/.github/workflows/validate-binaries.yml
@@ -22,6 +22,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       os:
@@ -50,6 +55,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   win:
@@ -58,6 +68,7 @@ jobs:
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
+      torchonly: ${{ inputs.torchonly }}
 
   linux:
     if:  inputs.os == 'linux' || inputs.os == 'all'
@@ -65,6 +76,7 @@ jobs:
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
+      torchonly: ${{ inputs.torchonly }}
 
   linux-aarch64:
     if:  inputs.os == 'linux-aarch64'
@@ -72,6 +84,7 @@ jobs:
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
+      torchonly: ${{ inputs.torchonly }}
 
   mac:
     if:  inputs.os == 'macos' || inputs.os == 'all'
@@ -79,6 +92,7 @@ jobs:
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
+      torchonly: ${{ inputs.torchonly }}
 
   mac-arm64:
     if:  inputs.os == 'macos' || inputs.os == 'all'
@@ -86,3 +100,4 @@ jobs:
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
+      torchonly: ${{ inputs.torchonly }}
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 13d980488..937f0e95f 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -12,6 +12,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -28,6 +33,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-linux-matrix:
@@ -53,6 +63,7 @@ jobs:
       script: |
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
+        export TORCH_ONLY=${{ inputs.torchonly }}
         export TARGET_OS="linux"
         eval "$(conda shell.bash hook)"
 
diff --git a/.github/workflows/validate-macos-arm64-binaries.yml b/.github/workflows/validate-macos-arm64-binaries.yml
index f321022d4..f23dec3f6 100644
--- a/.github/workflows/validate-macos-arm64-binaries.yml
+++ b/.github/workflows/validate-macos-arm64-binaries.yml
@@ -12,6 +12,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -28,6 +33,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-macos-arm64-matrix:
@@ -53,4 +63,5 @@ jobs:
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="macos-arm64"
+        export TORCH_ONLY=${{ inputs.torchonly }}
         source ./.github/scripts/validate_binaries.sh
diff --git a/.github/workflows/validate-macos-binaries.yml b/.github/workflows/validate-macos-binaries.yml
index 0e3f38ff8..0926dbe93 100644
--- a/.github/workflows/validate-macos-binaries.yml
+++ b/.github/workflows/validate-macos-binaries.yml
@@ -12,6 +12,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -28,6 +33,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-macos-matrix:
@@ -53,4 +63,5 @@ jobs:
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="macos"
+        export TORCH_ONLY=${{ inputs.torchonly }}
         source ./.github/scripts/validate_binaries.sh
diff --git a/.github/workflows/validate-windows-binaries.yml b/.github/workflows/validate-windows-binaries.yml
index 463626c5a..96d2b281e 100644
--- a/.github/workflows/validate-windows-binaries.yml
+++ b/.github/workflows/validate-windows-binaries.yml
@@ -12,6 +12,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -28,6 +33,11 @@ on:
         default: ""
         required: false
         type: string
+      torchonly:
+        description: 'Validate torchonly'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-windows-matrix:
@@ -55,6 +65,7 @@ jobs:
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="windows"
+        export TORCH_ONLY=${{ inputs.torchonly }}
         source /c/Jenkins/Miniconda3/etc/profile.d/conda.sh
         if [[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" ]]; then
           ./windows/internal/driver_update.bat

From dbc20b68b9f8432a90fa7961d437ace2b3386cf2 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 25 Aug 2023 14:04:05 -0400
Subject: [PATCH 027/212] Remove pipy validation temporarily (#1495)

---
 .github/workflows/validate-linux-binaries.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 937f0e95f..088e4dec8 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -71,7 +71,7 @@ jobs:
         if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" ]] && \
            ([[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} != "release" ]] || \
             [[ ${MATRIX_GPU_ARCH_VERSION} == "11.7" && ${MATRIX_CHANNEL} == "release" ]]); then
-          source ./.github/scripts/validate_pipy.sh --runtime-error-check disabled
+          # source ./.github/scripts/validate_pipy.sh --runtime-error-check disabled
           # temporary disable poetry check
           # source ./.github/scripts/validate_poetry.sh --runtime-error-check disabled
         fi

From 7ce4bc75ae1729bc696edc9359d9c7df8a45d4b7 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 25 Aug 2023 15:25:23 -0400
Subject: [PATCH 028/212] Remove pipy validation temporarily (#1496)

---
 .github/workflows/validate-linux-binaries.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 088e4dec8..6d135e3a3 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -67,14 +67,5 @@ jobs:
         export TARGET_OS="linux"
         eval "$(conda shell.bash hook)"
 
-        # Special case PyPi installation package. And Install of PyPi package via poetry
-        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" ]] && \
-           ([[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} != "release" ]] || \
-            [[ ${MATRIX_GPU_ARCH_VERSION} == "11.7" && ${MATRIX_CHANNEL} == "release" ]]); then
-          # source ./.github/scripts/validate_pipy.sh --runtime-error-check disabled
-          # temporary disable poetry check
-          # source ./.github/scripts/validate_poetry.sh --runtime-error-check disabled
-        fi
-
         # Standart case: Validate binaries
         source ./.github/scripts/validate_binaries.sh

From 99f34d63f2cd577f2619c38468fc16e77660c5fe Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 12:14:23 -0400
Subject: [PATCH 029/212] Add no-sudo to linux-aarch64 tests (#1499)

---
 .github/workflows/validate-aarch64-linux-binaries.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index 92b5e48ea..f496498cf 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -61,6 +61,7 @@ jobs:
       ref: ${{ inputs.ref || github.ref }}
       job-name: ${{ matrix.build_name }}
       binary-matrix: ${{ toJSON(matrix) }}
+      no-sudo: true
       script: |
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"

From 6cbaf70f2e4e71ebc8605de35326487344f23d2a Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 12:32:01 -0400
Subject: [PATCH 030/212] Pass container image to aarch64 test jobs (#1500)

---
 .github/workflows/validate-aarch64-linux-binaries.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index f496498cf..57a37da05 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -60,6 +60,7 @@ jobs:
       repository: "pytorch/builder"
       ref: ${{ inputs.ref || github.ref }}
       job-name: ${{ matrix.build_name }}
+      docker-image: ${{ matrix.container_image }}
       binary-matrix: ${{ toJSON(matrix) }}
       no-sudo: true
       script: |

From 912bb2aac5ae5f905f2636f84f84db4768f93a11 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 12:43:01 -0400
Subject: [PATCH 031/212] Add setup aarch64 builds for aarch64 testing (#1501)

---
 .github/workflows/validate-aarch64-linux-binaries.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index 57a37da05..b067f4821 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -65,6 +65,9 @@ jobs:
       no-sudo: true
       script: |
         set -ex
+        source ./aarch64_linux/aarch64_ci_setup.sh
+        echo "/opt/conda/bin" >> $GITHUB_PATH
+
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="linux-aarch64"
         export TORCH_ONLY=${{ inputs.torchonly }}

From 0c3634c34e036c7467c6c5d548b4057e46c2ed5c Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 12:50:32 -0400
Subject: [PATCH 032/212] Fix DESIRED_PYTHON setting for aarch64 validations
 (#1502)

---
 .github/workflows/validate-aarch64-linux-binaries.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index b067f4821..d3e57fd5c 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -65,6 +65,7 @@ jobs:
       no-sudo: true
       script: |
         set -ex
+        export DESIRED_PYTHON=${{ matrix.python_version }}
         source ./aarch64_linux/aarch64_ci_setup.sh
         echo "/opt/conda/bin" >> $GITHUB_PATH
 

From f50bd85685bbe6662cbb51658648f4fa35dd3a47 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 15:29:19 -0400
Subject: [PATCH 033/212] Use extra-index-url for aarch64 builds (#1503)

---
 .github/workflows/validate-aarch64-linux-binaries.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index d3e57fd5c..b66f0f1a3 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -69,6 +69,8 @@ jobs:
         source ./aarch64_linux/aarch64_ci_setup.sh
         echo "/opt/conda/bin" >> $GITHUB_PATH
 
+        MATRIX_INSTALLATION=${MATRIX_INSTALLATION/"index-url"/"extra-index-url"}
+
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="linux-aarch64"
         export TORCH_ONLY=${{ inputs.torchonly }}

From 8b39596d58c2232c2c3a6aebc850eaced1c5b1ed Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 16:25:00 -0400
Subject: [PATCH 034/212] Pypi validation enable (#1504)

---
 .github/workflows/validate-linux-binaries.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 6d135e3a3..937f0e95f 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -67,5 +67,14 @@ jobs:
         export TARGET_OS="linux"
         eval "$(conda shell.bash hook)"
 
+        # Special case PyPi installation package. And Install of PyPi package via poetry
+        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" ]] && \
+           ([[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} != "release" ]] || \
+            [[ ${MATRIX_GPU_ARCH_VERSION} == "11.7" && ${MATRIX_CHANNEL} == "release" ]]); then
+          source ./.github/scripts/validate_pipy.sh --runtime-error-check disabled
+          # temporary disable poetry check
+          # source ./.github/scripts/validate_poetry.sh --runtime-error-check disabled
+        fi
+
         # Standart case: Validate binaries
         source ./.github/scripts/validate_binaries.sh

From 3c2d43c0c7572019e53be34d6a0f0a38392c2c63 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 16:46:43 -0400
Subject: [PATCH 035/212] Validation pypi torchonly (#1505)

---
 .github/scripts/validate_pipy.sh | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/validate_pipy.sh b/.github/scripts/validate_pipy.sh
index 85b788cf0..d840fe019 100644
--- a/.github/scripts/validate_pipy.sh
+++ b/.github/scripts/validate_pipy.sh
@@ -1,12 +1,18 @@
 conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
 
-if [[ ${MATRIX_CHANNEL} != "release" ]]; then
+TEST_SUFFIX=""
+if [[ ${TORCH_ONLY} == 'true' ]]; then
+    TEST_SUFFIX=" --package torchonly"
     conda run -p ${ENV_NAME}_pypi pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
-    conda run -p ${ENV_NAME}_pypi pip3 install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
 else
-    conda run -p ${ENV_NAME}_pypi pip3 install torch torchvision torchaudio
+    if [[ ${MATRIX_CHANNEL} != "release" ]]; then
+        conda run -p ${ENV_NAME}_pypi pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+        conda run -p ${ENV_NAME}_pypi pip3 install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
+    else
+        conda run -p ${ENV_NAME}_pypi pip3 install torch torchvision torchaudio
+    fi
 fi
 
-conda run -p ${ENV_NAME}_pypi python ./test/smoke_test/smoke_test.py
+conda run -p ${ENV_NAME}_pypi python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
 conda deactivate
 conda env remove -p ${ENV_NAME}_pypi

From 3145dfade0628a82c02cc850962f7bd670f58525 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 18:07:38 -0400
Subject: [PATCH 036/212] Pipy validation workflow (#1506)

---
 .github/scripts/validate_pipy.sh | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/scripts/validate_pipy.sh b/.github/scripts/validate_pipy.sh
index d840fe019..c8c87b277 100644
--- a/.github/scripts/validate_pipy.sh
+++ b/.github/scripts/validate_pipy.sh
@@ -1,18 +1,20 @@
 conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
+conda activate ${ENV_NAME}_pypi
 
 TEST_SUFFIX=""
 if [[ ${TORCH_ONLY} == 'true' ]]; then
     TEST_SUFFIX=" --package torchonly"
-    conda run -p ${ENV_NAME}_pypi pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+    pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
 else
     if [[ ${MATRIX_CHANNEL} != "release" ]]; then
-        conda run -p ${ENV_NAME}_pypi pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
-        conda run -p ${ENV_NAME}_pypi pip3 install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
+        pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+        pip3 install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
     else
-        conda run -p ${ENV_NAME}_pypi pip3 install torch torchvision torchaudio
+        pip3 install torch torchvision torchaudio
     fi
 fi
 
-conda run -p ${ENV_NAME}_pypi python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
+python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
+
 conda deactivate
 conda env remove -p ${ENV_NAME}_pypi

From c7f4331998edd1f6915dcaab39d25bb50bac679c Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 18:39:51 -0400
Subject: [PATCH 037/212] Pipy validation workflow (#1507)

---
 .github/scripts/validate_pipy.sh | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/validate_pipy.sh b/.github/scripts/validate_pipy.sh
index c8c87b277..91f8d692f 100644
--- a/.github/scripts/validate_pipy.sh
+++ b/.github/scripts/validate_pipy.sh
@@ -1,20 +1,18 @@
 conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
-conda activate ${ENV_NAME}_pypi
 
 TEST_SUFFIX=""
 if [[ ${TORCH_ONLY} == 'true' ]]; then
     TEST_SUFFIX=" --package torchonly"
-    pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+    conda run -p ${ENV_NAME}_pypi pip install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
 else
     if [[ ${MATRIX_CHANNEL} != "release" ]]; then
-        pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
-        pip3 install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
+        conda run -p ${ENV_NAME}_pypi pip install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+        conda run -p ${ENV_NAME}_pypi pip install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
     else
-        pip3 install torch torchvision torchaudio
+        conda run -p ${ENV_NAME}_pypi pip install torch torchvision torchaudio
     fi
 fi
 
-python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
-
+conda run -p ${ENV_NAME}_pypi python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
 conda deactivate
 conda env remove -p ${ENV_NAME}_pypi

From ccbfd40227ce8c6789bc50b3df0676fd0a9a1028 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 19:04:34 -0400
Subject: [PATCH 038/212] Pipy validation workflow (#1508)

---
 .github/scripts/validate_pipy.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/validate_pipy.sh b/.github/scripts/validate_pipy.sh
index 91f8d692f..578e2c8d4 100644
--- a/.github/scripts/validate_pipy.sh
+++ b/.github/scripts/validate_pipy.sh
@@ -1,18 +1,19 @@
-conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
+conda create -yn ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
+conda activate ${ENV_NAME}_pypi
 
 TEST_SUFFIX=""
 if [[ ${TORCH_ONLY} == 'true' ]]; then
     TEST_SUFFIX=" --package torchonly"
-    conda run -p ${ENV_NAME}_pypi pip install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+    pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
 else
     if [[ ${MATRIX_CHANNEL} != "release" ]]; then
-        conda run -p ${ENV_NAME}_pypi pip install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
-        conda run -p ${ENV_NAME}_pypi pip install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
+        pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+        pip3 install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
     else
-        conda run -p ${ENV_NAME}_pypi pip install torch torchvision torchaudio
+        pip3 install torch torchvision torchaudio
     fi
 fi
 
-conda run -p ${ENV_NAME}_pypi python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
+python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
 conda deactivate
 conda env remove -p ${ENV_NAME}_pypi

From fe02df78f44c6c61b396410766a3333dc87b3c31 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 29 Aug 2023 19:22:14 -0400
Subject: [PATCH 039/212] Pipy validation workflow (#1509)

---
 .github/scripts/validate_pipy.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/validate_pipy.sh b/.github/scripts/validate_pipy.sh
index 578e2c8d4..ed7915079 100644
--- a/.github/scripts/validate_pipy.sh
+++ b/.github/scripts/validate_pipy.sh
@@ -4,11 +4,11 @@ conda activate ${ENV_NAME}_pypi
 TEST_SUFFIX=""
 if [[ ${TORCH_ONLY} == 'true' ]]; then
     TEST_SUFFIX=" --package torchonly"
-    pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+    pip3 install --pre torch --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
 else
     if [[ ${MATRIX_CHANNEL} != "release" ]]; then
-        pip3 install --pre torch --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
-        pip3 install --pre torchvision torchaudio --index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
+        pip3 install --pre torch --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+        pip3 install --pre torchvision torchaudio --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
     else
         pip3 install torch torchvision torchaudio
     fi

From 10f5379c4e2744b9800248c95f248c61ce1e7e06 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 30 Aug 2023 07:54:24 -0400
Subject: [PATCH 040/212] Validate poetry workflow (#1511)

---
 .github/scripts/validate_poetry.sh            | 20 +++++++++++++++----
 .github/workflows/validate-linux-binaries.yml |  4 ++--
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/validate_poetry.sh b/.github/scripts/validate_poetry.sh
index 65540bb35..85101a409 100644
--- a/.github/scripts/validate_poetry.sh
+++ b/.github/scripts/validate_poetry.sh
@@ -8,20 +8,32 @@ poetry --version
 poetry new test_poetry
 cd test_poetry
 
+TEST_SUFFIX=""
+if [[ ${TORCH_ONLY} == 'true' ]]; then
+    TEST_SUFFIX=" --package torchonly"
+else
+
 if [[ ${MATRIX_CHANNEL} != "release" ]]; then
     # Installing poetry from our custom repo. We need to configure it before use and disable authentication
     export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
     poetry source add --priority=explicit domains "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
-    poetry source add --priority=supplemental pytorch-nightly "https://download.pytorch.org/whl/${MATRIX_CHANNEL}"
+    poetry source add --priority=supplemental pytorch-channel "https://download.pytorch.org/whl/${MATRIX_CHANNEL}"
     poetry source add --priority=supplemental pytorch "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
     poetry --quiet add --source pytorch torch
-    poetry --quiet add --source domains torchvision torchaudio
+
+    if [[ ${TORCH_ONLY} != 'true' ]]; then
+        poetry --quiet add --source domains torchvision torchaudio
+    fi
 else
     export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
-    poetry --quiet add torch torchaudio torchvision
+    if [[ ${TORCH_ONLY} == 'true' ]]; then
+        poetry --quiet add torch
+    else
+        poetry --quiet add torch torchaudio torchvision
+    fi
 fi
 
-python ../test/smoke_test/smoke_test.py
+python ../test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
 conda deactivate
 conda env remove -p ${ENV_NAME}_poetry
 cd ..
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 937f0e95f..12335e843 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -71,9 +71,9 @@ jobs:
         if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" ]] && \
            ([[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} != "release" ]] || \
             [[ ${MATRIX_GPU_ARCH_VERSION} == "11.7" && ${MATRIX_CHANNEL} == "release" ]]); then
-          source ./.github/scripts/validate_pipy.sh --runtime-error-check disabled
+          source ./.github/scripts/validate_pipy.sh
           # temporary disable poetry check
-          # source ./.github/scripts/validate_poetry.sh --runtime-error-check disabled
+          source ./.github/scripts/validate_poetry.sh
         fi
 
         # Standart case: Validate binaries

From d172580f54e4ba2dd0f3ae64e51b413605a65c00 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 30 Aug 2023 09:14:30 -0400
Subject: [PATCH 041/212] Validate poetry workflow (#1512)

---
 .github/scripts/validate_poetry.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/validate_poetry.sh b/.github/scripts/validate_poetry.sh
index 85101a409..c4e4fd154 100644
--- a/.github/scripts/validate_poetry.sh
+++ b/.github/scripts/validate_poetry.sh
@@ -11,7 +11,7 @@ cd test_poetry
 TEST_SUFFIX=""
 if [[ ${TORCH_ONLY} == 'true' ]]; then
     TEST_SUFFIX=" --package torchonly"
-else
+fi
 
 if [[ ${MATRIX_CHANNEL} != "release" ]]; then
     # Installing poetry from our custom repo. We need to configure it before use and disable authentication

From 894baacd1214ae7c58456b00f6b4f90efdbb6fc5 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 30 Aug 2023 11:22:18 -0400
Subject: [PATCH 042/212] Remove linux-aarch64 installation workaround (#1513)

---
 .github/workflows/validate-aarch64-linux-binaries.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index b66f0f1a3..d3e57fd5c 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -69,8 +69,6 @@ jobs:
         source ./aarch64_linux/aarch64_ci_setup.sh
         echo "/opt/conda/bin" >> $GITHUB_PATH
 
-        MATRIX_INSTALLATION=${MATRIX_INSTALLATION/"index-url"/"extra-index-url"}
-
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="linux-aarch64"
         export TORCH_ONLY=${{ inputs.torchonly }}

From 1e281befc6ca16e16d826965b3dd6b784be20f77 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 31 Aug 2023 15:13:17 -0400
Subject: [PATCH 043/212] Temporary change test aarch64 builds (#1514)

---
 .github/workflows/validate-aarch64-linux-binaries.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index d3e57fd5c..4d8b49334 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -69,6 +69,9 @@ jobs:
         source ./aarch64_linux/aarch64_ci_setup.sh
         echo "/opt/conda/bin" >> $GITHUB_PATH
 
+        # todo: Remove after aarch64 filename is fixed
+        export MATRIX_INSTALLATION=${MATRIX_INSTALLATION/"pip3 install"/"pip3 install --pre"}
+
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="linux-aarch64"
         export TORCH_ONLY=${{ inputs.torchonly }}

From 39740d922a9c8dc0bca4ddb8d242ae9d43c67e0f Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 31 Aug 2023 18:12:08 -0400
Subject: [PATCH 044/212] Remove torchonly restictions from aarch64 builds
 (#1517)

---
 .github/scripts/validate_binaries.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 6b4bccd6b..0c01dbca2 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -32,8 +32,6 @@ else
 
     if [[ ${TARGET_OS} == 'windows' ]]; then
         python  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
-    elif [[ ${TARGET_OS} == 'linux-aarch64'  ]]; then
-        python3  ./test/smoke_test/smoke_test.py  --package=torchonly
     else
         python3  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
     fi

From 57ee59abdb99440428aa5821dd077620937f4b93 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 1 Sep 2023 11:09:57 -0400
Subject: [PATCH 045/212] Fix aarch64 nightly/release version override (#1518)

* Aarch64 fix overrdie passing from CI to build

* Aarch64 fix overrdie passing from CI to build

* Aarch64 fix overrdie passing from CI to build
---
 aarch64_linux/aarch64_wheel_ci_build.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 5d80a95e4..4ad620ba2 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -48,7 +48,7 @@ def complete_wheel(folder: str):
         os.system(f"mv /{folder}/wheelhouse/{repaired_wheel_name} /{folder}/dist/")
     else:
         repaired_wheel_name = wheel_name
-    
+
     print(f"Copying {repaired_wheel_name} to artfacts")
     os.system(f"mv /{folder}/dist/{repaired_wheel_name} /artifacts/")
 
@@ -85,12 +85,18 @@ def parse_arguments():
     build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("python setup.py clean")
 
-    if branch == 'nightly' or branch == 'master':
-        build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
-        version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
-        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
-    if branch.startswith("v1.") or branch.startswith("v2."):
-        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
+    override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
+    if override_package_version is not None:
+        version = override_package_version
+        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
+    else:
+        if branch == 'nightly' or branch == 'master':
+            build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
+            version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
+            build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
+        if branch.startswith("v1.") or branch.startswith("v2."):
+            build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
+
     if enable_mkldnn:
         build_ArmComputeLibrary(git_clone_flags)
         print("build pytorch with mkldnn+acl backend")

From 20173e029e07767d5a4bbf5653548aa48c207a47 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 5 Sep 2023 10:44:59 -0400
Subject: [PATCH 046/212] Revert "Temporary change test aarch64 builds (#1514)"
 (#1521)

This reverts commit 1e281befc6ca16e16d826965b3dd6b784be20f77.
---
 .github/workflows/validate-aarch64-linux-binaries.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index 4d8b49334..d3e57fd5c 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -69,9 +69,6 @@ jobs:
         source ./aarch64_linux/aarch64_ci_setup.sh
         echo "/opt/conda/bin" >> $GITHUB_PATH
 
-        # todo: Remove after aarch64 filename is fixed
-        export MATRIX_INSTALLATION=${MATRIX_INSTALLATION/"pip3 install"/"pip3 install --pre"}
-
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="linux-aarch64"
         export TORCH_ONLY=${{ inputs.torchonly }}

From b4814f5322fc4d1f22a6a7568d5a169a65e616a6 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 6 Sep 2023 08:04:44 -0400
Subject: [PATCH 047/212] Changes related to OVERRIDE_PACKAGE_VERSION in
 aarch64 builds (#1520) (#1523)

---
 aarch64_linux/aarch64_ci_build.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/aarch64_linux/aarch64_ci_build.sh b/aarch64_linux/aarch64_ci_build.sh
index c374359c2..321287ff5 100644
--- a/aarch64_linux/aarch64_ci_build.sh
+++ b/aarch64_linux/aarch64_ci_build.sh
@@ -4,6 +4,19 @@ set -eux -o pipefail
 SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
 source $SCRIPTPATH/aarch64_ci_setup.sh
 
+tagged_version() {
+  GIT_DESCRIBE="git --git-dir /pytorch/.git describe --tags --match v[0-9]*.[0-9]*.[0-9]*"
+  if ${GIT_DESCRIBE} --exact >/dev/null; then
+    ${GIT_DESCRIBE}
+  else
+    return 1
+  fi
+}
+
+if tagged_version >/dev/null; then
+  export OVERRIDE_PACKAGE_VERSION="$(tagged_version | sed -e 's/^v//' -e 's/-.*$//')"
+fi
+
 ###############################################################################
 # Run aarch64 builder python
 ###############################################################################

From 18c2797a34714b2bb748d21754915cab8a741d93 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@gmail.com>
Date: Wed, 6 Sep 2023 14:30:23 -0400
Subject: [PATCH 048/212] Torchmetrics in S3 Index (#1522)

We will need the stable torchmetrics wheel in the S3 index, since torchrec depends on it. This is similar to how pytorch depends on numpy, etc. and these binaries need to be hosted in our index when uses try to pip install from download.pytorch.org.
---
 s3_management/manage.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 655f7de40..4b35ecab2 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -85,6 +85,7 @@
     "torchcsprng",
     "torchdata",
     "torchdistx",
+    "torchmetrics",
     "torchrec",
     "torchtext",
     "torchvision",

From 3026f248be97304bcca1c348de276958a9d5eefc Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Thu, 7 Sep 2023 08:55:09 -0500
Subject: [PATCH 049/212] [aarch64] update ACL version to v23.05.1 and OpenBLAS
 to v0.3.20 (#1488)

---
 aarch64_linux/build_aarch64_wheel.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index 0bab3126a..5595dc94a 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -219,7 +219,7 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None:
 
 def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None:
     print('Building OpenBLAS')
-    host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.19 {git_clone_flags}")
+    host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.20 {git_clone_flags}")
     make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8"
     host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS")
 
@@ -227,10 +227,7 @@ def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None:
 def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None:
     print('Building Arm Compute Library')
     acl_build_flags="debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 build=native"
-    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v22.11 {git_clone_flags}")
-    host.run_cmd(['sed -i -e \'s/"armv8.2-a"/"armv8-a"/g\' ComputeLibrary/SConscript'])
-    host.run_cmd(['sed -i -e \'s/-march=armv8.2-a+fp16/-march=armv8-a/g\' ComputeLibrary/SConstruct'])
-    host.run_cmd(['sed -i -e \'s/"-march=armv8.2-a"/"-march=armv8-a"/g\' ComputeLibrary/filedefs.json'])
+    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.05.1 {git_clone_flags}")
     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
 
 

From 195148266541a9789074265141cb7dc19dc14c54 Mon Sep 17 00:00:00 2001
From: Danylo Baibak <baibak@meta.com>
Date: Mon, 11 Sep 2023 09:18:23 +0200
Subject: [PATCH 050/212] Changed runner for linux arm64 (#1525)

---
 .github/workflows/build-manywheel-images.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index 3bf2dbe70..c7dbe2248 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -87,7 +87,7 @@ jobs:
         run: |
           manywheel/build_docker.sh
   build-docker-cpu-aarch64:
-    runs-on: linux.t4g.2xlarge
+    runs-on: linux.arm64.2xlarge
     env:
       GPU_ARCH_TYPE: cpu-aarch64
     steps:

From 0c1d107c4e9f1ed5f09e6d4e1e740353f00e6a09 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@gmail.com>
Date: Mon, 11 Sep 2023 16:59:46 -0400
Subject: [PATCH 051/212] Add torch-tensorrt to S3 PyPI Index (#1529)

As pytorch/tensorrt moves off of CCI onto Nova, we must to host their nightlies on our S3 index. This change allows the indexing to occur correctly for this package.
---
 s3_management/manage.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 4b35ecab2..719923d47 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -80,6 +80,7 @@
     "requests",
     "sympy",
     "torch",
+    "torch_tensorrt",
     "torcharrow",
     "torchaudio",
     "torchcsprng",

From 12a6ea560790246cb3230c9fb02e795b32fa74b7 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 14 Sep 2023 14:49:46 -0400
Subject: [PATCH 052/212] Enable torch compile for python 3.11 smoke tests
 (#1534)

* Enable torch compile for python 3.11 smoke tests

* Make sure release is covered

* Fix typo
---
 test/smoke_test/smoke_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 3ae4d9421..539f1ba1b 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -129,7 +129,9 @@ def smoke_test_cuda(package: str, runtime_error_check: str) -> None:
         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
 
         # torch.compile is available only on Linux and python 3.8-3.10
-        if (sys.platform == "linux" or sys.platform == "linux2") and sys.version_info < (3, 11, 0):
+        if (sys.platform == "linux" or sys.platform == "linux2") and sys.version_info < (3, 11, 0) and channel == "release":
+            smoke_test_compile()
+        elif (sys.platform == "linux" or sys.platform == "linux2") and channel != "release":
             smoke_test_compile()
 
         if(runtime_error_check == "enabled"):

From 224a4c536e5efdf8d55ea25e6cd7a5dbb474cc43 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 14 Sep 2023 16:21:10 -0400
Subject: [PATCH 053/212] add jinja2 (#1536)

---
 .github/scripts/validate_binaries.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 0c01dbca2..e9b780057 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -3,7 +3,7 @@ if [[ ${MATRIX_PACKAGE_TYPE} == "libtorch" ]]; then
     unzip libtorch.zip
 else
     # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
-    conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
+    conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg jinja2
     conda activate ${ENV_NAME}
     INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
     TEST_SUFFIX=""

From d76e1bbd533d6dfbb8b1d3004fa2022b14a4b8a8 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 14 Sep 2023 16:37:27 -0400
Subject: [PATCH 054/212] Remove restriction on 3.11 (#1537)

---
 conda/pytorch-nightly/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 5cab9f53d..79025dc8c 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -49,7 +49,7 @@ requirements:
     - sympy
     - filelock
     - networkx
-    - jinja2 # [py <= 310]
+    - jinja2
     - pyyaml
     {% if cross_compile_arm64 == 0 %}
     - blas * mkl

From e9e31edc19196d08fb32d39ca22dfc98301cbfed Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 14 Sep 2023 17:08:41 -0400
Subject: [PATCH 055/212] Revert "add jinja2 (#1536)" (#1538)

This reverts commit 224a4c536e5efdf8d55ea25e6cd7a5dbb474cc43.
---
 .github/scripts/validate_binaries.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index e9b780057..0c01dbca2 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -3,7 +3,7 @@ if [[ ${MATRIX_PACKAGE_TYPE} == "libtorch" ]]; then
     unzip libtorch.zip
 else
     # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
-    conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg jinja2
+    conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
     conda activate ${ENV_NAME}
     INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
     TEST_SUFFIX=""

From 9f365204ffce6d37c5447e07f8a5db291120ec16 Mon Sep 17 00:00:00 2001
From: Omkar Salpekar <osalpekar@gmail.com>
Date: Thu, 14 Sep 2023 17:19:53 -0400
Subject: [PATCH 056/212] S3 Management Job Outside Docker (#1531)

* S3 Management Job Outside Docker

* job name

* remove failfast

* no matrix

* inherit secrets

* spacing?

* random nits

* add back secrets

* add back matrix

* export env vars correctlty

* Update update-s3-html.yml
---
 .github/workflows/update-s3-html.yml | 35 ++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 .github/workflows/update-s3-html.yml

diff --git a/.github/workflows/update-s3-html.yml b/.github/workflows/update-s3-html.yml
new file mode 100644
index 000000000..7c285418e
--- /dev/null
+++ b/.github/workflows/update-s3-html.yml
@@ -0,0 +1,35 @@
+name: Update S3 HTML indices for download.pytorch.org
+
+on:
+  schedule:
+    # Update the indices every 30 minutes
+    - cron: "*/30 * * * *"
+  workflow_dispatch:
+
+jobs:
+  update:
+    strategy:
+      matrix:
+        prefix: ["whl", "whl/test", "whl/nightly", "whl/lts/1.8"]
+      fail-fast: False
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    secrets: inherit
+    with:
+      repository: pytorch/builder
+      timeout: 60
+      secrets-env: AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY
+      script: |
+        set -ex
+
+        # Create Conda Environment
+        git config --global --add safe.directory /__w/builder/builder
+        conda create --quiet -y --prefix run_env python="3.8"
+        conda activate ./run_env
+
+        # Set Envs
+        export AWS_ACCESS_KEY_ID="${SECRET_AWS_ACCESS_KEY_ID}"
+        export AWS_SECRET_ACCESS_KEY="${SECRET_AWS_SECRET_ACCESS_KEY}"
+
+        # Install requirements
+        pip install -r s3_management/requirements.txt
+        python s3_management/manage.py --generate-pep503 ${{ matrix.prefix }}

From 22f0903d597a9d6cdbcf8790d5d6e18c94bd2afe Mon Sep 17 00:00:00 2001
From: Supadchaya <138070207+spcyppt@users.noreply.github.com>
Date: Fri, 15 Sep 2023 11:09:59 -0700
Subject: [PATCH 057/212] Add fbgemm-gpu to S3 Index (#1539)

---
 s3_management/manage.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 719923d47..ef7ae74fb 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -40,6 +40,7 @@
     "charset_normalizer",
     "cmake",
     "colorama",
+    "fbgemm_gpu",
     "filelock",
     "fsspec",
     "idna",

From 17ea05e4536e78c9c0be8641952b1c5850a298cb Mon Sep 17 00:00:00 2001
From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com>
Date: Mon, 18 Sep 2023 20:45:13 -0500
Subject: [PATCH 058/212] Update builder images to ROCm5.7 (#1541)

* Update docker build images for rocm5.7

* Fix erroneous logic that was skipping msccl files even for ROCm5.6; update msccl path for ROCm5.7

(cherry picked from commit 36c10cc3be475780aa7d76a7ccdbe3f8731042c9)

* missing bzip2 package install for miopen

* Revert "missing bzip2 package install for miopen"

This reverts commit 8ef5fc956508e34315866059431ca015f485f77d.

* ROCm 5.7 MIOpen does not need any patches, do not build from source

---------

Co-authored-by: Jeff Daily <jeff.daily@amd.com>
---
 .github/workflows/build-libtorch-images.yml  |  2 +-
 .github/workflows/build-manywheel-images.yml |  2 +-
 common/install_miopen.sh                     |  5 ++++-
 manywheel/build_rocm.sh                      | 13 ++++++++++---
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml
index b2b50d250..d62030047 100644
--- a/.github/workflows/build-libtorch-images.yml
+++ b/.github/workflows/build-libtorch-images.yml
@@ -52,7 +52,7 @@ jobs:
     runs-on: linux.12xlarge
     strategy:
       matrix:
-        rocm_version: ["5.5", "5.6"]
+        rocm_version: ["5.6", "5.7"]
     env:
       GPU_ARCH_TYPE: rocm
       GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index c7dbe2248..bbac707dd 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -58,7 +58,7 @@ jobs:
     runs-on: linux.12xlarge
     strategy:
       matrix:
-        rocm_version: ["5.5", "5.6"]
+        rocm_version: ["5.6", "5.7"]
     env:
       GPU_ARCH_TYPE: rocm
       GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
diff --git a/common/install_miopen.sh b/common/install_miopen.sh
index 696a91905..c01517927 100644
--- a/common/install_miopen.sh
+++ b/common/install_miopen.sh
@@ -58,7 +58,10 @@ MIOPEN_CMAKE_COMMON_FLAGS="
 -DMIOPEN_BUILD_DRIVER=OFF
 "
 # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
-if [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then
+if [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 50800 ]]; then
+    echo "ROCm 5.7 MIOpen does not need any patches, do not build from source"
+    exit 0
+elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then
     MIOPEN_BRANCH="release/rocm-rel-5.6-staging"
 elif [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then
     MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11"
diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh
index 80ebde6f4..0fed5970b 100755
--- a/manywheel/build_rocm.sh
+++ b/manywheel/build_rocm.sh
@@ -204,10 +204,17 @@ if [[ $ROCM_INT -ge 50500 ]]; then
 
     DEPS_AUX_SRCLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/})
     DEPS_AUX_DSTLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/})
-elif [[ $ROCM_INT -ge 50600 ]]; then
+fi
+
+if [[ $ROCM_INT -ge 50600 ]]; then
     # RCCL library files
-    RCCL_SHARE_SRC=$ROCM_HOME/lib/msccl-algorithms
-    RCCL_SHARE_DST=lib/msccl-algorithms
+    if [[ $ROCM_INT -ge 50700 ]]; then
+        RCCL_SHARE_SRC=$ROCM_HOME/share/rccl/msccl-algorithms
+        RCCL_SHARE_DST=share/rccl/msccl-algorithms
+    else
+        RCCL_SHARE_SRC=$ROCM_HOME/lib/msccl-algorithms
+        RCCL_SHARE_DST=lib/msccl-algorithms
+    fi
     RCCL_SHARE_FILES=($(ls $RCCL_SHARE_SRC))
 
     DEPS_AUX_SRCLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_SRC/})

From cbc95ff6ee0483cb5bf42949a0d1e78ef0571ce1 Mon Sep 17 00:00:00 2001
From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com>
Date: Mon, 18 Sep 2023 20:56:13 -0500
Subject: [PATCH 059/212] Update docker build convenience scripts to ROCm5.7
 (#1543)

---
 libtorch/build_all_docker.sh  | 2 +-
 manywheel/build_all_docker.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libtorch/build_all_docker.sh b/libtorch/build_all_docker.sh
index e73e713b3..fb6bd975b 100755
--- a/libtorch/build_all_docker.sh
+++ b/libtorch/build_all_docker.sh
@@ -8,6 +8,6 @@ for cuda_version in 12.1 11.8; do
     GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh"
 done
 
-for rocm_version in 5.5 5.6; do
+for rocm_version in 5.6 5.7; do
     GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh"
 done
diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh
index 2bd720f2f..2995e3be7 100644
--- a/manywheel/build_all_docker.sh
+++ b/manywheel/build_all_docker.sh
@@ -16,7 +16,7 @@ for cuda_version in 12.1 11.8; do
     MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh"
 done
 
-for rocm_version in 5.5 5.6; do
+for rocm_version in 5.6 5.7; do
     GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
     MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
 done

From 59a2f92aa12c3c0cb11622b05fe77de8312f6d00 Mon Sep 17 00:00:00 2001
From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com>
Date: Tue, 19 Sep 2023 12:09:56 -0500
Subject: [PATCH 060/212] Do not uninstall MIOpen if skipping build-from-source
 (#1544)

---
 common/install_miopen.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/install_miopen.sh b/common/install_miopen.sh
index c01517927..779bc755d 100644
--- a/common/install_miopen.sh
+++ b/common/install_miopen.sh
@@ -33,8 +33,6 @@ if [[ $ROCM_INT -lt 40001 ]]; then
     exit 0
 fi
 
-yum remove -y miopen-hip
-
 # Function to retry functions that sometimes timeout or have flaky failures
 retry () {
     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
@@ -85,6 +83,8 @@ else
     exit 1
 fi
 
+yum remove -y miopen-hip
+
 git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH}
 pushd MIOpen
 # remove .git to save disk space since CI runner was running out

From 553b4dff742ccead6d349993cc588f6cc3a8e98d Mon Sep 17 00:00:00 2001
From: cyy <cyyever@outlook.com>
Date: Sat, 23 Sep 2023 03:45:39 +0800
Subject: [PATCH 061/212] Install nvtx3 on Windows (#1547)

---
 windows/internal/cuda_install.bat | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/windows/internal/cuda_install.bat b/windows/internal/cuda_install.bat
index acd457a17..18a64b6a2 100644
--- a/windows/internal/cuda_install.bat
+++ b/windows/internal/cuda_install.bat
@@ -32,7 +32,7 @@ if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
     if errorlevel 1 exit /b 1
     set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
-    set "ARGS=cuda_profiler_api_11.8 thrust_11.8 nvcc_11.8 cuobjdump_11.8 nvprune_11.8 nvprof_11.8 cupti_11.8 cublas_11.8 cublas_dev_11.8 cudart_11.8 cufft_11.8 cufft_dev_11.8 curand_11.8 curand_dev_11.8 cusolver_11.8 cusolver_dev_11.8 cusparse_11.8 cusparse_dev_11.8 npp_11.8 npp_dev_11.8 nvrtc_11.8 nvrtc_dev_11.8 nvml_dev_11.8"
+    set "ARGS=cuda_profiler_api_11.8 thrust_11.8 nvcc_11.8 cuobjdump_11.8 nvprune_11.8 nvprof_11.8 cupti_11.8 cublas_11.8 cublas_dev_11.8 cudart_11.8 cufft_11.8 cufft_dev_11.8 curand_11.8 curand_dev_11.8 cusolver_11.8 cusolver_dev_11.8 cusparse_11.8 cusparse_dev_11.8 npp_11.8 npp_dev_11.8 nvrtc_11.8 nvrtc_dev_11.8 nvml_dev_11.8 nvtx_11.8"
 )
 
 set CUDNN_FOLDER=cudnn-windows-x86_64-8.7.0.84_cuda11-archive
@@ -59,7 +59,7 @@ if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
     if errorlevel 1 exit /b 1
     set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
-    set "ARGS=cuda_profiler_api_12.1 thrust_12.1 nvcc_12.1 cuobjdump_12.1 nvprune_12.1 nvprof_12.1 cupti_12.1 cublas_12.1 cublas_dev_12.1 cudart_12.1 cufft_12.1 cufft_dev_12.1 curand_12.1 curand_dev_12.1 cusolver_12.1 cusolver_dev_12.1 cusparse_12.1 cusparse_dev_12.1 npp_12.1 npp_dev_12.1 nvrtc_12.1 nvrtc_dev_12.1 nvml_dev_12.1 nvjitlink_12.1"
+    set "ARGS=cuda_profiler_api_12.1 thrust_12.1 nvcc_12.1 cuobjdump_12.1 nvprune_12.1 nvprof_12.1 cupti_12.1 cublas_12.1 cublas_dev_12.1 cudart_12.1 cufft_12.1 cufft_dev_12.1 curand_12.1 curand_dev_12.1 cusolver_12.1 cusolver_dev_12.1 cusparse_12.1 cusparse_dev_12.1 npp_12.1 npp_dev_12.1 nvrtc_12.1 nvrtc_dev_12.1 nvml_dev_12.1 nvjitlink_12.1 nvtx_12.1"
 )
 
 set CUDNN_FOLDER=cudnn-windows-x86_64-8.9.2.26_cuda12-archive

From dbad8b7e78bd30bb79ee5f2b2c04b9b7024282c0 Mon Sep 17 00:00:00 2001
From: Matt Davis <matteius@gmail.com>
Date: Fri, 22 Sep 2023 20:00:27 -0400
Subject: [PATCH 062/212] Provide file hashes in the URLs to avoid unnecessary
 file downloads (bandwidth saver) (#1433)

Supply sha256 query parameters using boto3 to avoid hundreds of extra Gigabytes of downloads each day during pipenv and poetry resolution lock cycles.

Fixes point 1 in https://github.com/pytorch/pytorch/issues/76557
Fixes #1347
---
 s3_management/manage.py | 59 ++++++++++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 15 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index ef7ae74fb..51fede761 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -1,12 +1,15 @@
 #!/usr/bin/env python
 
 import argparse
+import base64
+import dataclasses
+import functools
 import time
 
 from os import path, makedirs
 from datetime import datetime
 from collections import defaultdict
-from typing import Iterator, List, Type, Dict, Set, TypeVar, Optional
+from typing import Iterable, List, Type, Dict, Set, TypeVar, Optional
 from re import sub, match, search
 from packaging.version import parse
 
@@ -14,7 +17,6 @@
 
 
 S3 = boto3.resource('s3')
-CLIENT = boto3.client('s3')
 BUCKET = S3.Bucket('pytorch')
 
 ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz")
@@ -107,6 +109,23 @@
 
 S3IndexType = TypeVar('S3IndexType', bound='S3Index')
 
+
+@dataclasses.dataclass(frozen=True)
+@functools.total_ordering
+class S3Object:
+    key: str
+    checksum: str | None
+
+    def __str__(self):
+        return self.key
+
+    def __eq__(self, other):
+        return self.key == other.key
+
+    def __lt__(self, other):
+        return self.key < other.key
+
+
 def extract_package_build_time(full_package_name: str) -> datetime:
     result = search(PACKAGE_DATE_REGEX, full_package_name)
     if result is not None:
@@ -124,7 +143,7 @@ def between_bad_dates(package_build_time: datetime):
 
 
 class S3Index:
-    def __init__(self: S3IndexType, objects: List[str], prefix: str) -> None:
+    def __init__(self: S3IndexType, objects: List[S3Object], prefix: str) -> None:
         self.objects = objects
         self.prefix = prefix.rstrip("/")
         self.html_name = PREFIXES_WITH_HTML[self.prefix]
@@ -134,7 +153,7 @@ def __init__(self: S3IndexType, objects: List[str], prefix: str) -> None:
             path.dirname(obj) for obj in objects if path.dirname != prefix
         }
 
-    def nightly_packages_to_show(self: S3IndexType) -> Set[str]:
+    def nightly_packages_to_show(self: S3IndexType) -> Set[S3Object]:
         """Finding packages to show based on a threshold we specify
 
         Basically takes our S3 packages, normalizes the version for easier
@@ -174,8 +193,8 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[str]:
             if self.normalize_package_version(obj) in to_hide
         })
 
-    def is_obj_at_root(self, obj:str) -> bool:
-        return path.dirname(obj) == self.prefix
+    def is_obj_at_root(self, obj: S3Object) -> bool:
+        return path.dirname(str(obj)) == self.prefix
 
     def _resolve_subdir(self, subdir: Optional[str] = None) -> str:
         if not subdir:
@@ -187,7 +206,7 @@ def gen_file_list(
         self,
         subdir: Optional[str]=None,
         package_name: Optional[str] = None
-    ) -> Iterator[str]:
+    ) -> Iterable[S3Object]:
         objects = (
             self.nightly_packages_to_show() if self.prefix == 'whl/nightly'
             else self.objects
@@ -197,23 +216,23 @@ def gen_file_list(
             if package_name is not None:
                 if self.obj_to_package_name(obj) != package_name:
                     continue
-            if self.is_obj_at_root(obj) or obj.startswith(subdir):
+            if self.is_obj_at_root(obj) or str(obj).startswith(subdir):
                 yield obj
 
     def get_package_names(self, subdir: Optional[str] = None) -> List[str]:
         return sorted(set(self.obj_to_package_name(obj) for obj in self.gen_file_list(subdir)))
 
-    def normalize_package_version(self: S3IndexType, obj: str) -> str:
+    def normalize_package_version(self: S3IndexType, obj: S3Object) -> str:
         # removes the GPU specifier from the package name as well as
         # unnecessary things like the file extension, architecture name, etc.
         return sub(
             r"%2B.*",
             "",
-            "-".join(path.basename(obj).split("-")[:2])
+            "-".join(path.basename(str(obj)).split("-")[:2])
         )
 
-    def obj_to_package_name(self, obj: str) -> str:
-        return path.basename(obj).split('-', 1)[0]
+    def obj_to_package_name(self, obj: S3Object) -> str:
+        return path.basename(str(obj)).split('-', 1)[0]
 
     def to_legacy_html(
         self,
@@ -258,7 +277,8 @@ def to_simple_package_html(
         out.append('  <body>')
         out.append('    <h1>Links for {}</h1>'.format(package_name.lower().replace("_","-")))
         for obj in sorted(self.gen_file_list(subdir, package_name)):
-            out.append(f'    <a href="/{obj}">{path.basename(obj).replace("%2B","+")}</a><br/>')
+            maybe_fragment = f"#sha256={obj.checksum}" if obj.checksum else ""
+            out.append(f'    <a href="/{obj}{maybe_fragment}">{path.basename(obj).replace("%2B","+")}</a><br/>')
         # Adding html footer
         out.append('  </body>')
         out.append('</html>')
@@ -319,7 +339,6 @@ def upload_pep503_htmls(self) -> None:
                     Body=self.to_simple_package_html(subdir=subdir, package_name=pkg_name)
                 )
 
-
     def save_legacy_html(self) -> None:
         for subdir in self.subdirs:
             print(f"INFO Saving {subdir}/{self.html_name}")
@@ -351,10 +370,18 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
                 for pattern in ACCEPTED_SUBDIR_PATTERNS
             ]) and obj.key.endswith(ACCEPTED_FILE_EXTENSIONS)
             if is_acceptable:
+                # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
+                response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED")
+                sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
                 sanitized_key = obj.key.replace("+", "%2B")
-                objects.append(sanitized_key)
+                s3_object = S3Object(
+                    key=sanitized_key,
+                    checksum=sha256,
+                )
+                objects.append(s3_object)
         return cls(objects, prefix)
 
+
 def create_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser("Manage S3 HTML indices for PyTorch")
     parser.add_argument(
@@ -366,6 +393,7 @@ def create_parser() -> argparse.ArgumentParser:
     parser.add_argument("--generate-pep503", action="store_true")
     return parser
 
+
 def main():
     parser = create_parser()
     args = parser.parse_args()
@@ -390,5 +418,6 @@ def main():
             if args.generate_pep503:
                 idx.upload_pep503_htmls()
 
+
 if __name__ == "__main__":
     main()

From 1327c0b2d6050832d135fe708118225bedcaad1e Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Fri, 22 Sep 2023 17:02:16 -0700
Subject: [PATCH 063/212] Workaround for older files

---
 s3_management/manage.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 51fede761..03950af55 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -373,6 +373,9 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
                 # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
                 response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED")
                 sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
+                # For older files, rely on checksumsha256 metadata that can be added to the file later
+                if sha256 is None:
+                    sha256 = response.get("Metadata", {}).get("checksumsha256")
                 sanitized_key = obj.key.replace("+", "%2B")
                 s3_object = S3Object(
                     key=sanitized_key,

From dc0a5791766f5f8dc1612993854b6e5d5056cd12 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Fri, 22 Sep 2023 17:42:16 -0700
Subject: [PATCH 064/212] Bugfixes introduced by
 https://github.com/pytorch/builder/pull/1433

Replace `obj` with `obj.key` in few places
Dismantle pyramid of doom while iterating over objects

Test plan: Run `python manage.py whl/test --generate-pep503`
---
 s3_management/manage.py | 45 +++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 03950af55..a46a1d9da 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -114,7 +114,7 @@
 @functools.total_ordering
 class S3Object:
     key: str
-    checksum: str | None
+    checksum: Optional[str]
 
     def __str__(self):
         return self.key
@@ -150,7 +150,7 @@ def __init__(self: S3IndexType, objects: List[S3Object], prefix: str) -> None:
         # should dynamically grab subdirectories like whl/test/cu101
         # so we don't need to add them manually anymore
         self.subdirs = {
-            path.dirname(obj) for obj in objects if path.dirname != prefix
+            path.dirname(obj.key) for obj in objects if path.dirname != prefix
         }
 
     def nightly_packages_to_show(self: S3IndexType) -> Set[S3Object]:
@@ -194,7 +194,7 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[S3Object]:
         })
 
     def is_obj_at_root(self, obj: S3Object) -> bool:
-        return path.dirname(str(obj)) == self.prefix
+        return path.dirname(obj.key) == self.prefix
 
     def _resolve_subdir(self, subdir: Optional[str] = None) -> str:
         if not subdir:
@@ -216,7 +216,7 @@ def gen_file_list(
             if package_name is not None:
                 if self.obj_to_package_name(obj) != package_name:
                     continue
-            if self.is_obj_at_root(obj) or str(obj).startswith(subdir):
+            if self.is_obj_at_root(obj) or obj.key.startswith(subdir):
                 yield obj
 
     def get_package_names(self, subdir: Optional[str] = None) -> List[str]:
@@ -228,11 +228,11 @@ def normalize_package_version(self: S3IndexType, obj: S3Object) -> str:
         return sub(
             r"%2B.*",
             "",
-            "-".join(path.basename(str(obj)).split("-")[:2])
+            "-".join(path.basename(obj.key).split("-")[:2])
         )
 
     def obj_to_package_name(self, obj: S3Object) -> str:
-        return path.basename(str(obj)).split('-', 1)[0]
+        return path.basename(obj.key).split('-', 1)[0]
 
     def to_legacy_html(
         self,
@@ -250,7 +250,7 @@ def to_legacy_html(
         is_root = subdir == self.prefix
         for obj in self.gen_file_list(subdir):
             # Strip our prefix
-            sanitized_obj = obj.replace(subdir, "", 1)
+            sanitized_obj = obj.key.replace(subdir, "", 1)
             if sanitized_obj.startswith('/'):
                 sanitized_obj = sanitized_obj.lstrip("/")
             # we include objects at our root prefix so that users can still
@@ -258,7 +258,7 @@ def to_legacy_html(
             # to install a specific GPU arch of torch / torchvision
             if not is_root and self.is_obj_at_root(obj):
                 # strip root prefix
-                sanitized_obj = obj.replace(self.prefix, "", 1).lstrip("/")
+                sanitized_obj = obj.key.replace(self.prefix, "", 1).lstrip("/")
                 sanitized_obj = f"../{sanitized_obj}"
             out.append(f'<a href="{sanitized_obj}">{sanitized_obj}</a><br/>')
         return "\n".join(sorted(out))
@@ -278,7 +278,7 @@ def to_simple_package_html(
         out.append('    <h1>Links for {}</h1>'.format(package_name.lower().replace("_","-")))
         for obj in sorted(self.gen_file_list(subdir, package_name)):
             maybe_fragment = f"#sha256={obj.checksum}" if obj.checksum else ""
-            out.append(f'    <a href="/{obj}{maybe_fragment}">{path.basename(obj).replace("%2B","+")}</a><br/>')
+            out.append(f'    <a href="/{obj.key}{maybe_fragment}">{path.basename(obj.key).replace("%2B","+")}</a><br/>')
         # Adding html footer
         out.append('  </body>')
         out.append('</html>')
@@ -369,19 +369,20 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
                 )
                 for pattern in ACCEPTED_SUBDIR_PATTERNS
             ]) and obj.key.endswith(ACCEPTED_FILE_EXTENSIONS)
-            if is_acceptable:
-                # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
-                response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED")
-                sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
-                # For older files, rely on checksumsha256 metadata that can be added to the file later
-                if sha256 is None:
-                    sha256 = response.get("Metadata", {}).get("checksumsha256")
-                sanitized_key = obj.key.replace("+", "%2B")
-                s3_object = S3Object(
-                    key=sanitized_key,
-                    checksum=sha256,
-                )
-                objects.append(s3_object)
+            if not is_acceptable:
+                continue
+            # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
+            response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED")
+            sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
+            # For older files, rely on checksum-sha256 metadata that can be added to the file later
+            if sha256 is None:
+                sha256 = response.get("Metadata", {}).get("checksum-sha256")
+            sanitized_key = obj.key.replace("+", "%2B")
+            s3_object = S3Object(
+                key=sanitized_key,
+                checksum=sha256,
+            )
+            objects.append(s3_object)
         return cls(objects, prefix)
 
 

From 21ffba158c9c79c6d13ac85d04008d1828d8d139 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 25 Sep 2023 10:20:51 -0700
Subject: [PATCH 065/212] [S3_management] Update boto3 to 1.28.53

---
 s3_management/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/s3_management/requirements.txt b/s3_management/requirements.txt
index d9fe7f1f0..fa23e39b1 100644
--- a/s3_management/requirements.txt
+++ b/s3_management/requirements.txt
@@ -1,2 +1,2 @@
-boto3==1.12.7
+boto3==1.28.53
 packaging==21.3

From 410ec8ea9298a3a1b11c426e4dfed151df71456a Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 25 Sep 2023 17:19:05 -0700
Subject: [PATCH 066/212] [manage_s3] Download objects metadata concurrently

Using `concurrent.futures.ThreadPoolExecutor`
This speeds up rebuilding `whl/test` index from 300 sec to 90 sec on my
laptop
---
 s3_management/manage.py | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index a46a1d9da..0df513d32 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -2,6 +2,7 @@
 
 import argparse
 import base64
+import concurrent.futures
 import dataclasses
 import functools
 import time
@@ -17,6 +18,7 @@
 
 
 S3 = boto3.resource('s3')
+CLIENT = boto3.client('s3')
 BUCKET = S3.Bucket('pytorch')
 
 ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz")
@@ -359,8 +361,8 @@ def save_pep503_htmls(self) -> None:
 
     @classmethod
     def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
-        objects = []
         prefix = prefix.rstrip("/")
+        obj_names = []
         for obj in BUCKET.objects.filter(Prefix=prefix):
             is_acceptable = any([path.dirname(obj.key) == prefix] + [
                 match(
@@ -371,18 +373,25 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
             ]) and obj.key.endswith(ACCEPTED_FILE_EXTENSIONS)
             if not is_acceptable:
                 continue
+            obj_names.append(obj.key)
+        objects = []
+        def fetch_metadata(key: str) :
+            return CLIENT.head_object(Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled")
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
             # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
-            response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED")
-            sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
-            # For older files, rely on checksum-sha256 metadata that can be added to the file later
-            if sha256 is None:
-                sha256 = response.get("Metadata", {}).get("checksum-sha256")
-            sanitized_key = obj.key.replace("+", "%2B")
-            s3_object = S3Object(
-                key=sanitized_key,
-                checksum=sha256,
-            )
-            objects.append(s3_object)
+            for obj_key, future in {key: executor.submit(fetch_metadata, key) for key in obj_names}.items():
+                response = future.result()
+                sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
+                # For older files, rely on checksum-sha256 metadata that can be added to the file later
+                if sha256 is None:
+                    sha256 = response.get("Metadata", {}).get("checksum-sha256")
+                sanitized_key = obj_key.replace("+", "%2B")
+                s3_object = S3Object(
+                    key=sanitized_key,
+                    checksum=sha256,
+                )
+                objects.append(s3_object)
         return cls(objects, prefix)
 
 

From ef74cedc67d27d73df4b3586c804b0591bfdf7b8 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 26 Sep 2023 10:31:09 -0700
Subject: [PATCH 067/212] Make smoke-test runnable without envvars

---
 test/smoke_test/smoke_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 539f1ba1b..375ff45be 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -57,11 +57,13 @@ def check_version(package: str) -> None:
     # only makes sense to check nightly package where dates are known
     if channel == "nightly":
         check_nightly_binaries_date(package)
-    else:
+    elif stable_version is not None:
         if not torch.__version__.startswith(stable_version):
             raise RuntimeError(
                 f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}"
             )
+    else:
+        print(f"Skip version check for channel {channel} as stable version is None")
 
 def check_nightly_binaries_date(package: str) -> None:
     from datetime import datetime, timedelta

From f6d12ba88ad82bfa29618c56f88a6f9168ceb757 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 28 Sep 2023 18:47:50 -0400
Subject: [PATCH 068/212] [aarch64] set acl_build_flags arch=armv8a, remove
 editing build flags (#1550)

Looking at this PR:
https://github.com/pytorch/builder/pull/1370/
this line:
https://github.com/pytorch/builder/pull/1370/files#diff-54480d0a69ca27f54fb0736a9762caa8b03bd4736dcd77190d99ec3033c9bd2fR229

That fixed the issue:
https://github.com/pytorch/pytorch/issues/97226

One of the changes is to set
```
arch=armv8a
```
We are experiencing the same issue now: https://github.com/pytorch/pytorch/issues/109312
Hence this fix.
---
 aarch64_linux/aarch64_wheel_ci_build.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 4ad620ba2..f6797ce1c 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -21,11 +21,8 @@ def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
     print('Building Arm Compute Library')
     os.system("cd / && mkdir /acl")
     os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.05.1 {git_clone_flags}")
-    os.system('sed -i -e \'s/"armv8.2-a"/"armv8-a"/g\' ComputeLibrary/SConscript; '
-              'sed -i -e \'s/-march=armv8.2-a+fp16/-march=armv8-a/g\' ComputeLibrary/SConstruct; '
-              'sed -i -e \'s/"-march=armv8.2-a"/"-march=armv8-a"/g\' ComputeLibrary/filedefs.json')
     os.system("cd ComputeLibrary; export acl_install_dir=/acl; "
-              "scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8.2-a multi_isa=1 build=native build_dir=$acl_install_dir/build; "
+              "scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 build=native build_dir=$acl_install_dir/build; "
               "cp -r arm_compute $acl_install_dir; "
               "cp -r include $acl_install_dir; "
               "cp -r utils $acl_install_dir; "

From 8465dbe657a814b95b91cc3243b071fc21c37519 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Fri, 29 Sep 2023 16:26:17 -0700
Subject: [PATCH 069/212] [BE] Fix all flake8 violations in `smoke_test.py`
 (#1553)

Namely:
 - `if(x):` -> `if x:`
 - `"dev\d+"` -> `"dev\\d+"`
 - Keep 2 newlines between functions
 - Add `assert foo is not None` to suppress "variable assigned but not used" warning
---
 test/smoke_test/smoke_test.py | 40 +++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 375ff45be..ca44b0369 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -38,6 +38,7 @@
     },
 ]
 
+
 class Net(nn.Module):
     def __init__(self):
         super(Net, self).__init__()
@@ -53,6 +54,7 @@ def forward(self, x):
         output = self.fc1(x)
         return output
 
+
 def check_version(package: str) -> None:
     # only makes sense to check nightly package where dates are known
     if channel == "nightly":
@@ -65,23 +67,23 @@ def check_version(package: str) -> None:
     else:
         print(f"Skip version check for channel {channel} as stable version is None")
 
+
 def check_nightly_binaries_date(package: str) -> None:
     from datetime import datetime, timedelta
     format_dt = '%Y%m%d'
 
-    torch_str = torch.__version__
-    date_t_str = re.findall("dev\d+", torch.__version__)
+    date_t_str = re.findall("dev\\d+", torch.__version__)
     date_t_delta = datetime.now() - datetime.strptime(date_t_str[0][3:], format_dt)
     if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA:
         raise RuntimeError(
             f"the binaries are from {date_t_str} and are more than {NIGHTLY_ALLOWED_DELTA} days old!"
         )
 
-    if(package == "all"):
+    if package == "all":
         for module in MODULES:
             imported_module = importlib.import_module(module["name"])
             module_version = imported_module.__version__
-            date_m_str = re.findall("dev\d+", module_version)
+            date_m_str = re.findall("dev\\d+", module_version)
             date_m_delta = datetime.now() - datetime.strptime(date_m_str[0][3:], format_dt)
             print(f"Nightly date check for {module['name']} version {module_version}")
             if date_m_delta.days > NIGHTLY_ALLOWED_DELTA:
@@ -89,8 +91,9 @@ def check_nightly_binaries_date(package: str) -> None:
                     f"Expected {module['name']} to be less then {NIGHTLY_ALLOWED_DELTA} days. But its {date_m_delta}"
                 )
 
+
 def test_cuda_runtime_errors_captured() -> None:
-    cuda_exception_missed=True
+    cuda_exception_missed = True
     try:
         print("Testing test_cuda_runtime_errors_captured")
         torch._assert_async(torch.tensor(0, device="cuda"))
@@ -101,14 +104,15 @@ def test_cuda_runtime_errors_captured() -> None:
             cuda_exception_missed = False
         else:
             raise e
-    if(cuda_exception_missed):
-        raise RuntimeError( f"Expected CUDA RuntimeError but have not received!")
+    if cuda_exception_missed:
+        raise RuntimeError("Expected CUDA RuntimeError but have not received!")
+
 
 def smoke_test_cuda(package: str, runtime_error_check: str) -> None:
     if not torch.cuda.is_available() and is_cuda_system:
         raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.")
 
-    if(package == 'all' and is_cuda_system):
+    if package == 'all' and is_cuda_system:
         for module in MODULES:
             imported_module = importlib.import_module(module["name"])
             # TBD for vision move extension module to private so it will
@@ -131,12 +135,10 @@ def smoke_test_cuda(package: str, runtime_error_check: str) -> None:
         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
 
         # torch.compile is available only on Linux and python 3.8-3.10
-        if (sys.platform == "linux" or sys.platform == "linux2") and sys.version_info < (3, 11, 0) and channel == "release":
-            smoke_test_compile()
-        elif (sys.platform == "linux" or sys.platform == "linux2") and channel != "release":
+        if sys.platform in ["linux", "linux2"] and (sys.version_info < (3, 11, 0) or channel != "release"):
             smoke_test_compile()
 
-        if(runtime_error_check == "enabled"):
+        if runtime_error_check == "enabled":
             test_cuda_runtime_errors_captured()
 
 
@@ -148,6 +150,7 @@ def smoke_test_conv2d() -> None:
     m = nn.Conv2d(16, 33, 3, stride=2)
     # non-square kernels and unequal stride and with padding
     m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+    assert m is not None
     # non-square kernels and unequal stride and with padding and dilation
     basic_conv = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
     input = torch.randn(20, 16, 50, 100)
@@ -156,9 +159,10 @@ def smoke_test_conv2d() -> None:
     if is_cuda_system:
         print("Testing smoke_test_conv2d with cuda")
         conv = nn.Conv2d(3, 3, 3).cuda()
-        x = torch.randn(1, 3, 24, 24).cuda()
+        x = torch.randn(1, 3, 24, 24, device="cuda")
         with torch.cuda.amp.autocast():
             out = conv(x)
+        assert out is not None
 
         supported_dtypes = [torch.float16, torch.float32, torch.float64]
         for dtype in supported_dtypes:
@@ -166,6 +170,8 @@ def smoke_test_conv2d() -> None:
             conv = basic_conv.to(dtype).cuda()
             input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
             output = conv(input)
+            assert output is not None
+
 
 def smoke_test_linalg() -> None:
     print("Testing smoke_test_linalg")
@@ -189,10 +195,13 @@ def smoke_test_linalg() -> None:
             A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
             torch.linalg.svd(A)
 
+
 def smoke_test_compile() -> None:
     supported_dtypes = [torch.float16, torch.float32, torch.float64]
+
     def foo(x: torch.Tensor) -> torch.Tensor:
         return torch.sin(x) + torch.cos(x)
+
     for dtype in supported_dtypes:
         print(f"Testing smoke_test_compile for {dtype}")
         x = torch.rand(3, 3, device="cuda").type(dtype)
@@ -209,6 +218,7 @@ def foo(x: torch.Tensor) -> torch.Tensor:
     model = Net().to(device="cuda")
     x_pt2 = torch.compile(model, mode="max-autotune")(x)
 
+
 def smoke_test_modules():
     cwd = os.getcwd()
     for module in MODULES:
@@ -224,9 +234,7 @@ def smoke_test_modules():
                     smoke_test_command, stderr=subprocess.STDOUT, shell=True,
                     universal_newlines=True)
             except subprocess.CalledProcessError as exc:
-                raise RuntimeError(
-                        f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}"
-                    )
+                raise RuntimeError(f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}")
             else:
                 print("Output: \n{}\n".format(output))
 

From d0fc085ddd11a8b98c0a1809273be87c65eef68f Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Fri, 29 Sep 2023 22:34:39 -0500
Subject: [PATCH 070/212] [aarch64] patch mkl-dnn to use 'march=armv8-a' as the
 default build (#1554)

---
 ...4-fix-default-build-flags-to-armv8-a.patch | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch

diff --git a/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch b/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch
new file mode 100644
index 000000000..f6e91010a
--- /dev/null
+++ b/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch
@@ -0,0 +1,29 @@
+---
+ cmake/platform.cmake | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/cmake/platform.cmake b/cmake/platform.cmake
+index 8630460ce..602eafe8e 100644
+--- a/cmake/platform.cmake
++++ b/cmake/platform.cmake
+@@ -198,7 +198,7 @@ elseif(UNIX OR MINGW)
+              endif()
+              # For native compilation tune for the host processor
+              if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
+-                 append(DEF_ARCH_OPT_FLAGS "-mcpu=native")
++                 append(DEF_ARCH_OPT_FLAGS "-march=armv8-a")
+              endif()
+         elseif(DNNL_TARGET_ARCH STREQUAL "PPC64")
+              if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+@@ -295,7 +295,7 @@ elseif(UNIX OR MINGW)
+             endif()
+             # For native compilation tune for the host processor
+             if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
+-                append(DEF_ARCH_OPT_FLAGS "-mcpu=native")
++                append(DEF_ARCH_OPT_FLAGS "-march=armv8-a")
+             endif()
+         elseif(DNNL_TARGET_ARCH STREQUAL "PPC64")
+             if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+-- 
+2.34.1
+

From 6021651b99e8bacdc7fba4f6f60f0034bc053190 Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Fri, 29 Sep 2023 22:35:25 -0500
Subject: [PATCH 071/212] [aarch64] patch pytorch 2.1 for mkl-dnn fix (#1555)

---
 aarch64_linux/build_aarch64_wheel.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index 5595dc94a..dd43bf218 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -554,7 +554,9 @@ def start_build(host: RemoteHost, *,
         build_ArmComputeLibrary(host, git_clone_flags)
         print("build pytorch with mkldnn+acl backend")
         build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
-        host.run_cmd(f"cd pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}")
+        host.run_cmd(f"cd $HOME && git clone https://github.com/pytorch/builder.git")
+        host.run_cmd(f"cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
+        host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}")
         print('Repair the wheel')
         pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
         host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}")

From 71772d14b84960eb3a0c2a8b6f91b86efd1593be Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Fri, 29 Sep 2023 23:07:27 -0500
Subject: [PATCH 072/212] patch ci script with mkldnn fix (#1556)

---
 aarch64_linux/aarch64_wheel_ci_build.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index f6797ce1c..da789a231 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -105,6 +105,9 @@ def parse_arguments():
     else:
         print("build pytorch without mkldnn backend")
 
+    # work around to fix Raspberry pie crash
+    os.system(f"cd $HOME && git clone https://github.com/pytorch/builder.git")
+    os.system(f"cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
     pytorch_wheel_name = complete_wheel("pytorch")
     print(f"Build Compelete. Created {pytorch_wheel_name}..")

From 685a807f08e46e0479c3ccdd175cb3333da4d72f Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Sat, 30 Sep 2023 13:57:38 -0700
Subject: [PATCH 073/212] [BE] Add lint workflow (#1557)

And format `smoke_test.py` with `ruff`
Invoke/confgure `ruff` using `lintrunner`
Copy lint runner adapters from https://github.com/pytorch/pytorch/tree/main/tools/linter/adapters
---
 .github/workflows/lint.yml           |  35 ++
 .lintrunner.toml                     |  20 ++
 pyproject.toml                       |  23 ++
 test/smoke_test/smoke_test.py        |  14 +-
 tools/linter/adapters/pip_init.py    |  83 +++++
 tools/linter/adapters/ruff_linter.py | 462 +++++++++++++++++++++++++++
 6 files changed, 629 insertions(+), 8 deletions(-)
 create mode 100644 .github/workflows/lint.yml
 create mode 100644 .lintrunner.toml
 create mode 100644 pyproject.toml
 create mode 100644 tools/linter/adapters/pip_init.py
 create mode 100644 tools/linter/adapters/ruff_linter.py

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 000000000..c8bd05613
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,35 @@
+name: Lint
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  lintrunner:
+    name: lintrunner
+
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install Lintrunner
+        run: |
+          pip install lintrunner
+          lintrunner init
+      - name: Run lintrunner on all files - Linux
+        run: |
+          set +e
+          if ! lintrunner -v --force-color --all-files --tee-json=lint.json; then
+              echo ""
+              echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner -m main\`.\e[0m"
+              exit 1
+          fi
diff --git a/.lintrunner.toml b/.lintrunner.toml
new file mode 100644
index 000000000..c551cb732
--- /dev/null
+++ b/.lintrunner.toml
@@ -0,0 +1,20 @@
+merge_base_with = "origin/main"
+
+[[linter]]
+code = 'RUFF'
+include_patterns = ['test/smoke_test/*.py']
+command = [
+    'python3',
+    'tools/linter/adapters/ruff_linter.py',
+    '--config=pyproject.toml',
+    '--show-disable',
+    '--',
+    '@{{PATHSFILE}}'
+]
+init_command = [
+    'python3',
+    'tools/linter/adapters/pip_init.py',
+    '--dry-run={{DRYRUN}}',
+    'ruff==0.0.290',
+]
+is_formatter = true
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..efa884a07
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,23 @@
+[tool.ruff]
+target-version = "py38"
+line-length = 120
+select = [
+    "B",
+    "C4",
+    "G",
+    "E",
+    "F",
+    "SIM1",
+    "W",
+    # Not included in flake8
+    "UP",
+    "PERF",
+    "PGH004",
+    "PIE807",
+    "PIE810",
+    "PLE",
+    "PLR1722", # use sys exit
+    "PLW3301", # nested min max
+    "RUF017",
+    "TRY302",
+]
diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index ca44b0369..8ae1d1c51 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -1,10 +1,8 @@
 import os
 import re
 import sys
-from pathlib import Path
 import argparse
 import torch
-import platform
 import importlib
 import subprocess
 import torch._dynamo
@@ -41,7 +39,7 @@
 
 class Net(nn.Module):
     def __init__(self):
-        super(Net, self).__init__()
+        super().__init__()
         self.conv1 = nn.Conv2d(1, 32, 3, 1)
         self.conv2 = nn.Conv2d(32, 64, 3, 1)
         self.fc1 = nn.Linear(9216, 1)
@@ -69,7 +67,7 @@ def check_version(package: str) -> None:
 
 
 def check_nightly_binaries_date(package: str) -> None:
-    from datetime import datetime, timedelta
+    from datetime import datetime
     format_dt = '%Y%m%d'
 
     date_t_str = re.findall("dev\\d+", torch.__version__)
@@ -177,11 +175,11 @@ def smoke_test_linalg() -> None:
     print("Testing smoke_test_linalg")
     A = torch.randn(5, 3)
     U, S, Vh = torch.linalg.svd(A, full_matrices=False)
-    U.shape, S.shape, Vh.shape
+    assert U.shape == A.shape and S.shape == torch.Size([3]) and Vh.shape == torch.Size([3, 3])
     torch.dist(A, U @ torch.diag(S) @ Vh)
 
     U, S, Vh = torch.linalg.svd(A)
-    U.shape, S.shape, Vh.shape
+    assert U.shape == A.shape and S.shape == torch.Size([3]) and Vh.shape == torch.Size([3, 3])
     torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)
 
     A = torch.randn(7, 5, 3)
@@ -234,9 +232,9 @@ def smoke_test_modules():
                     smoke_test_command, stderr=subprocess.STDOUT, shell=True,
                     universal_newlines=True)
             except subprocess.CalledProcessError as exc:
-                raise RuntimeError(f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}")
+                raise RuntimeError(f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}") from exc
             else:
-                print("Output: \n{}\n".format(output))
+                print(f"Output: \n{output}\n")
 
 
 def main() -> None:
diff --git a/tools/linter/adapters/pip_init.py b/tools/linter/adapters/pip_init.py
new file mode 100644
index 000000000..f177a920d
--- /dev/null
+++ b/tools/linter/adapters/pip_init.py
@@ -0,0 +1,83 @@
+"""
+Initializer script that installs stuff to pip.
+"""
+import argparse
+import logging
+import os
+import subprocess
+import sys
+import time
+
+from typing import List
+
+
+def run_command(args: List[str]) -> "subprocess.CompletedProcess[bytes]":
+    logging.debug("$ %s", " ".join(args))
+    start_time = time.monotonic()
+    try:
+        return subprocess.run(args, check=True)
+    finally:
+        end_time = time.monotonic()
+        logging.debug("took %dms", (end_time - start_time) * 1000)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="pip initializer")
+    parser.add_argument(
+        "packages",
+        nargs="+",
+        help="pip packages to install",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="verbose logging",
+    )
+    parser.add_argument(
+        "--dry-run", help="do not install anything, just print what would be done."
+    )
+    parser.add_argument(
+        "--no-black-binary",
+        help="do not use pre-compiled binaries from pip for black.",
+        action="store_true",
+    )
+
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        format="<%(threadName)s:%(levelname)s> %(message)s",
+        level=logging.NOTSET if args.verbose else logging.DEBUG,
+        stream=sys.stderr,
+    )
+
+    pip_args = ["pip3", "install"]
+
+    # If we are in a global install, use `--user` to install so that you do not
+    # need root access in order to initialize linters.
+    #
+    # However, `pip install --user` interacts poorly with virtualenvs (see:
+    # https://bit.ly/3vD4kvl) and conda (see: https://bit.ly/3KG7ZfU). So in
+    # these cases perform a regular installation.
+    in_conda = os.environ.get("CONDA_PREFIX") is not None
+    in_virtualenv = os.environ.get("VIRTUAL_ENV") is not None
+    if not in_conda and not in_virtualenv:
+        pip_args.append("--user")
+
+    pip_args.extend(args.packages)
+
+    for package in args.packages:
+        package_name, _, version = package.partition("=")
+        if version == "":
+            raise RuntimeError(
+                "Package {package_name} did not have a version specified. "
+                "Please specify a version to produce a consistent linting experience."
+            )
+        if args.no_black_binary and "black" in package_name:
+            pip_args.append(f"--no-binary={package_name}")
+
+    dry_run = args.dry_run == "1"
+    if dry_run:
+        print(f"Would have run: {pip_args}")
+        sys.exit(0)
+
+    run_command(pip_args)
diff --git a/tools/linter/adapters/ruff_linter.py b/tools/linter/adapters/ruff_linter.py
new file mode 100644
index 000000000..451834aa7
--- /dev/null
+++ b/tools/linter/adapters/ruff_linter.py
@@ -0,0 +1,462 @@
+"""Adapter for https://github.com/charliermarsh/ruff."""
+
+from __future__ import annotations
+
+import argparse
+import concurrent.futures
+import dataclasses
+import enum
+import json
+import logging
+import os
+import subprocess
+import sys
+import time
+from typing import Any, BinaryIO
+
+LINTER_CODE = "RUFF"
+IS_WINDOWS: bool = os.name == "nt"
+
+
+def eprint(*args: Any, **kwargs: Any) -> None:
+    """Print to stderr."""
+    print(*args, file=sys.stderr, flush=True, **kwargs)
+
+
+class LintSeverity(str, enum.Enum):
+    """Severity of a lint message."""
+
+    ERROR = "error"
+    WARNING = "warning"
+    ADVICE = "advice"
+    DISABLED = "disabled"
+
+
+@dataclasses.dataclass(frozen=True)
+class LintMessage:
+    """A lint message defined by https://docs.rs/lintrunner/latest/lintrunner/lint_message/struct.LintMessage.html."""
+
+    path: str | None
+    line: int | None
+    char: int | None
+    code: str
+    severity: LintSeverity
+    name: str
+    original: str | None
+    replacement: str | None
+    description: str | None
+
+    def asdict(self) -> dict[str, Any]:
+        return dataclasses.asdict(self)
+
+    def display(self) -> None:
+        """Print to stdout for lintrunner to consume."""
+        print(json.dumps(self.asdict()), flush=True)
+
+
+def as_posix(name: str) -> str:
+    return name.replace("\\", "/") if IS_WINDOWS else name
+
+
+def _run_command(
+    args: list[str],
+    *,
+    timeout: int | None,
+    stdin: BinaryIO | None,
+    input: bytes | None,
+    check: bool,
+    cwd: os.PathLike[Any] | None,
+) -> subprocess.CompletedProcess[bytes]:
+    logging.debug("$ %s", " ".join(args))
+    start_time = time.monotonic()
+    try:
+        if input is not None:
+            return subprocess.run(
+                args,
+                capture_output=True,
+                shell=False,
+                input=input,
+                timeout=timeout,
+                check=check,
+                cwd=cwd,
+            )
+
+        return subprocess.run(
+            args,
+            stdin=stdin,
+            capture_output=True,
+            shell=False,
+            timeout=timeout,
+            check=check,
+            cwd=cwd,
+        )
+    finally:
+        end_time = time.monotonic()
+        logging.debug("took %dms", (end_time - start_time) * 1000)
+
+
+def run_command(
+    args: list[str],
+    *,
+    retries: int = 0,
+    timeout: int | None = None,
+    stdin: BinaryIO | None = None,
+    input: bytes | None = None,
+    check: bool = False,
+    cwd: os.PathLike[Any] | None = None,
+) -> subprocess.CompletedProcess[bytes]:
+    remaining_retries = retries
+    while True:
+        try:
+            return _run_command(
+                args, timeout=timeout, stdin=stdin, input=input, check=check, cwd=cwd
+            )
+        except subprocess.TimeoutExpired as err:
+            if remaining_retries == 0:
+                raise err
+            remaining_retries -= 1
+            logging.warning(
+                "(%s/%s) Retrying because command failed with: %r",
+                retries - remaining_retries,
+                retries,
+                err,
+            )
+            time.sleep(1)
+
+
+def add_default_options(parser: argparse.ArgumentParser) -> None:
+    """Add default options to a parser.
+
+    This should be called the last in the chain of add_argument calls.
+    """
+    parser.add_argument(
+        "--retries",
+        type=int,
+        default=3,
+        help="number of times to retry if the linter times out.",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="verbose logging",
+    )
+    parser.add_argument(
+        "filenames",
+        nargs="+",
+        help="paths to lint",
+    )
+
+
+def explain_rule(code: str) -> str:
+    proc = run_command(
+        ["ruff", "rule", "--format=json", code],
+        check=True,
+    )
+    rule = json.loads(str(proc.stdout, "utf-8").strip())
+    return f"\n{rule['linter']}: {rule['summary']}"
+
+
+def get_issue_severity(code: str) -> LintSeverity:
+    # "B901": `return x` inside a generator
+    # "B902": Invalid first argument to a method
+    # "B903": __slots__ efficiency
+    # "B950": Line too long
+    # "C4": Flake8 Comprehensions
+    # "C9": Cyclomatic complexity
+    # "E2": PEP8 horizontal whitespace "errors"
+    # "E3": PEP8 blank line "errors"
+    # "E5": PEP8 line length "errors"
+    # "T400": type checking Notes
+    # "T49": internal type checker errors or unmatched messages
+    if any(
+        code.startswith(x)
+        for x in (
+            "B9",
+            "C4",
+            "C9",
+            "E2",
+            "E3",
+            "E5",
+            "T400",
+            "T49",
+            "PLC",
+            "PLR",
+        )
+    ):
+        return LintSeverity.ADVICE
+
+    # "F821": Undefined name
+    # "E999": syntax error
+    if any(code.startswith(x) for x in ("F821", "E999", "PLE")):
+        return LintSeverity.ERROR
+
+    # "F": PyFlakes Error
+    # "B": flake8-bugbear Error
+    # "E": PEP8 "Error"
+    # "W": PEP8 Warning
+    # possibly other plugins...
+    return LintSeverity.WARNING
+
+
+def format_lint_message(
+    message: str, code: str, rules: dict[str, str], show_disable: bool
+) -> str:
+    if rules:
+        message += f".\n{rules.get(code) or ''}"
+    message += ".\nSee https://beta.ruff.rs/docs/rules/"
+    if show_disable:
+        message += f".\n\nTo disable, use `  # noqa: {code}`"
+    return message
+
+
+def check_files(
+    filenames: list[str],
+    severities: dict[str, LintSeverity],
+    *,
+    config: str | None,
+    retries: int,
+    timeout: int,
+    explain: bool,
+    show_disable: bool,
+) -> list[LintMessage]:
+    try:
+        proc = run_command(
+            [
+                sys.executable,
+                "-m",
+                "ruff",
+                "--exit-zero",
+                "--quiet",
+                "--format=json",
+                *([f"--config={config}"] if config else []),
+                *filenames,
+            ],
+            retries=retries,
+            timeout=timeout,
+            check=True,
+        )
+    except (OSError, subprocess.CalledProcessError) as err:
+        return [
+            LintMessage(
+                path=None,
+                line=None,
+                char=None,
+                code=LINTER_CODE,
+                severity=LintSeverity.ERROR,
+                name="command-failed",
+                original=None,
+                replacement=None,
+                description=(
+                    f"Failed due to {err.__class__.__name__}:\n{err}"
+                    if not isinstance(err, subprocess.CalledProcessError)
+                    else (
+                        f"COMMAND (exit code {err.returncode})\n"
+                        f"{' '.join(as_posix(x) for x in err.cmd)}\n\n"
+                        f"STDERR\n{err.stderr.decode('utf-8').strip() or '(empty)'}\n\n"
+                        f"STDOUT\n{err.stdout.decode('utf-8').strip() or '(empty)'}"
+                    )
+                ),
+            )
+        ]
+
+    stdout = str(proc.stdout, "utf-8").strip()
+    vulnerabilities = json.loads(stdout)
+
+    if explain:
+        all_codes = {v["code"] for v in vulnerabilities}
+        rules = {code: explain_rule(code) for code in all_codes}
+    else:
+        rules = {}
+
+    return [
+        LintMessage(
+            path=vuln["filename"],
+            name=vuln["code"],
+            description=(
+                format_lint_message(
+                    vuln["message"],
+                    vuln["code"],
+                    rules,
+                    show_disable,
+                )
+            ),
+            line=int(vuln["location"]["row"]),
+            char=int(vuln["location"]["column"]),
+            code=LINTER_CODE,
+            severity=severities.get(vuln["code"], get_issue_severity(vuln["code"])),
+            original=None,
+            replacement=None,
+        )
+        for vuln in vulnerabilities
+    ]
+
+
+def check_file_for_fixes(
+    filename: str,
+    *,
+    config: str | None,
+    retries: int,
+    timeout: int,
+) -> list[LintMessage]:
+    try:
+        with open(filename, "rb") as f:
+            original = f.read()
+        with open(filename, "rb") as f:
+            proc_fix = run_command(
+                [
+                    sys.executable,
+                    "-m",
+                    "ruff",
+                    "--fix-only",
+                    "--exit-zero",
+                    *([f"--config={config}"] if config else []),
+                    "--stdin-filename",
+                    filename,
+                    "-",
+                ],
+                stdin=f,
+                retries=retries,
+                timeout=timeout,
+                check=True,
+            )
+    except (OSError, subprocess.CalledProcessError) as err:
+        return [
+            LintMessage(
+                path=None,
+                line=None,
+                char=None,
+                code=LINTER_CODE,
+                severity=LintSeverity.ERROR,
+                name="command-failed",
+                original=None,
+                replacement=None,
+                description=(
+                    f"Failed due to {err.__class__.__name__}:\n{err}"
+                    if not isinstance(err, subprocess.CalledProcessError)
+                    else (
+                        f"COMMAND (exit code {err.returncode})\n"
+                        f"{' '.join(as_posix(x) for x in err.cmd)}\n\n"
+                        f"STDERR\n{err.stderr.decode('utf-8').strip() or '(empty)'}\n\n"
+                        f"STDOUT\n{err.stdout.decode('utf-8').strip() or '(empty)'}"
+                    )
+                ),
+            )
+        ]
+
+    replacement = proc_fix.stdout
+    if original == replacement:
+        return []
+
+    return [
+        LintMessage(
+            path=filename,
+            name="format",
+            description="Run `lintrunner -a` to apply this patch.",
+            line=None,
+            char=None,
+            code=LINTER_CODE,
+            severity=LintSeverity.WARNING,
+            original=original.decode("utf-8"),
+            replacement=replacement.decode("utf-8"),
+        )
+    ]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description=f"Ruff linter. Linter code: {LINTER_CODE}. Use with RUFF-FIX to auto-fix issues.",
+        fromfile_prefix_chars="@",
+    )
+    parser.add_argument(
+        "--config",
+        default=None,
+        help="Path to the `pyproject.toml` or `ruff.toml` file to use for configuration",
+    )
+    parser.add_argument(
+        "--explain",
+        action="store_true",
+        help="Explain a rule",
+    )
+    parser.add_argument(
+        "--show-disable",
+        action="store_true",
+        help="Show how to disable a lint message",
+    )
+    parser.add_argument(
+        "--timeout",
+        default=90,
+        type=int,
+        help="Seconds to wait for ruff",
+    )
+    parser.add_argument(
+        "--severity",
+        action="append",
+        help="map code to severity (e.g. `F401:advice`). This option can be used multiple times.",
+    )
+    parser.add_argument(
+        "--no-fix",
+        action="store_true",
+        help="Do not suggest fixes",
+    )
+    add_default_options(parser)
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        format="<%(threadName)s:%(levelname)s> %(message)s",
+        level=logging.NOTSET
+        if args.verbose
+        else logging.DEBUG
+        if len(args.filenames) < 1000
+        else logging.INFO,
+        stream=sys.stderr,
+    )
+
+    severities: dict[str, LintSeverity] = {}
+    if args.severity:
+        for severity in args.severity:
+            parts = severity.split(":", 1)
+            assert len(parts) == 2, f"invalid severity `{severity}`"
+            severities[parts[0]] = LintSeverity(parts[1])
+
+    lint_messages = check_files(
+        args.filenames,
+        severities=severities,
+        config=args.config,
+        retries=args.retries,
+        timeout=args.timeout,
+        explain=args.explain,
+        show_disable=args.show_disable,
+    )
+    for lint_message in lint_messages:
+        lint_message.display()
+
+    if args.no_fix or not lint_messages:
+        # If we're not fixing, we can exit early
+        return
+
+    files_with_lints = {lint.path for lint in lint_messages if lint.path is not None}
+    with concurrent.futures.ThreadPoolExecutor(
+        max_workers=os.cpu_count(),
+        thread_name_prefix="Thread",
+    ) as executor:
+        futures = {
+            executor.submit(
+                check_file_for_fixes,
+                path,
+                config=args.config,
+                retries=args.retries,
+                timeout=args.timeout,
+            ): path
+            for path in files_with_lints
+        }
+        for future in concurrent.futures.as_completed(futures):
+            try:
+                for lint_message in future.result():
+                    lint_message.display()
+            except Exception:  # Catch all exceptions for lintrunner
+                logging.critical('Failed at "%s".', futures[future])
+                raise
+
+
+if __name__ == "__main__":
+    main()

From b2b429b7e4abd84955d91e76c288faae7ec9d19e Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Sat, 30 Sep 2023 14:08:43 -0700
Subject: [PATCH 074/212] [BE] Add `s3_management` to the linted folders
 (#1558)

Add `PERF401` to list of ignored suggestions, fix the rest.
---
 .lintrunner.toml              |  2 +-
 pyproject.toml                |  3 +++
 s3_management/backup_conda.py | 14 ++++++++------
 s3_management/manage.py       | 17 +++++++----------
 4 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/.lintrunner.toml b/.lintrunner.toml
index c551cb732..5c63a6c5d 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -2,7 +2,7 @@ merge_base_with = "origin/main"
 
 [[linter]]
 code = 'RUFF'
-include_patterns = ['test/smoke_test/*.py']
+include_patterns = ['test/smoke_test/*.py', 's3_management/*.py']
 command = [
     'python3',
     'tools/linter/adapters/ruff_linter.py',
diff --git a/pyproject.toml b/pyproject.toml
index efa884a07..b16714852 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,9 @@
 [tool.ruff]
 target-version = "py38"
 line-length = 120
+ignore = [
+    "PERF401",
+]
 select = [
     "B",
     "C4",
diff --git a/s3_management/backup_conda.py b/s3_management/backup_conda.py
index 06926589d..7dafa32b4 100644
--- a/s3_management/backup_conda.py
+++ b/s3_management/backup_conda.py
@@ -4,9 +4,9 @@
 # Do not use unless you know what you are doing
 # Usage:  python backup_conda.py --version 1.6.0
 
-import conda.api
 import boto3
 from typing import List, Optional
+import conda.api
 import urllib
 import os
 import hashlib
@@ -22,8 +22,11 @@ def compute_md5(path:str) -> str:
         return hashlib.md5(f.read()).hexdigest()
 
 
-def download_conda_package(package:str, version:Optional[str] = None, depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]:
-    packages = conda.api.SubdirData.query_all(package, channels = [channel] if channel is not None else None, subdirs = _known_subdirs)
+def download_conda_package(package:str, version:Optional[str] = None,
+                           depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]:
+    packages = conda.api.SubdirData.query_all(package,
+                                              channels = [channel] if channel is not None else None,
+                                              subdirs = _known_subdirs)
     rc = []
 
     for pkg in packages:
@@ -36,9 +39,8 @@ def download_conda_package(package:str, version:Optional[str] = None, depends:Op
         os.makedirs(pkg.subdir, exist_ok = True)
         fname = f"{pkg.subdir}/{pkg.fn}"
         if not os.path.exists(fname):
-            with open(fname, "wb") as f:
-                with urllib.request.urlopen(pkg.url) as url:
-                    f.write(url.read())
+            with open(fname, "wb") as f, urllib.request.urlopen(pkg.url) as url:
+                f.write(url.read())
         if compute_md5(fname) != pkg.md5:
             print(f"md5 of {fname} is {compute_md5(fname)} does not match {pkg.md5}")
             continue
diff --git a/s3_management/manage.py b/s3_management/manage.py
index 0df513d32..57af9259b 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -184,9 +184,7 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[S3Object]:
             if package_name not in PACKAGE_ALLOW_LIST:
                 to_hide.add(obj)
                 continue
-            if packages[package_name] >= KEEP_THRESHOLD:
-                to_hide.add(obj)
-            elif between_bad_dates(package_build_time):
+            if packages[package_name] >= KEEP_THRESHOLD or between_bad_dates(package_build_time):
                 to_hide.add(obj)
             else:
                 packages[package_name] += 1
@@ -215,14 +213,13 @@ def gen_file_list(
         )
         subdir = self._resolve_subdir(subdir) + '/'
         for obj in objects:
-            if package_name is not None:
-                if self.obj_to_package_name(obj) != package_name:
-                    continue
+            if package_name is not None and self.obj_to_package_name(obj) != package_name:
+                continue
             if self.is_obj_at_root(obj) or obj.key.startswith(subdir):
                 yield obj
 
     def get_package_names(self, subdir: Optional[str] = None) -> List[str]:
-        return sorted(set(self.obj_to_package_name(obj) for obj in self.gen_file_list(subdir)))
+        return sorted({self.obj_to_package_name(obj) for obj in self.gen_file_list(subdir)})
 
     def normalize_package_version(self: S3IndexType, obj: S3Object) -> str:
         # removes the GPU specifier from the package name as well as
@@ -284,7 +281,7 @@ def to_simple_package_html(
         # Adding html footer
         out.append('  </body>')
         out.append('</html>')
-        out.append('<!--TIMESTAMP {}-->'.format(int(time.time())))
+        out.append(f'<!--TIMESTAMP {int(time.time())}-->')
         return '\n'.join(out)
 
     def to_simple_packages_html(
@@ -303,7 +300,7 @@ def to_simple_packages_html(
         # Adding html footer
         out.append('  </body>')
         out.append('</html>')
-        out.append('<!--TIMESTAMP {}-->'.format(int(time.time())))
+        out.append(f'<!--TIMESTAMP {int(time.time())}-->')
         return '\n'.join(out)
 
     def upload_legacy_html(self) -> None:
@@ -412,7 +409,7 @@ def main():
     args = parser.parse_args()
     action = "Saving" if args.do_not_upload else "Uploading"
     if args.prefix == 'all':
-        for prefix in PREFIXES_WITH_HTML.keys():
+        for prefix in PREFIXES_WITH_HTML:
             print(f"INFO: {action} indices for '{prefix}'")
             idx = S3Index.from_S3(prefix=prefix)
             if args.do_not_upload:

From aa6df6fc264ee6a414652d37f2022d5634e42174 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 2 Oct 2023 10:47:29 -0400
Subject: [PATCH 075/212] Fix path issue when building aarch64 wheels (#1560)

---
 aarch64_linux/aarch64_wheel_ci_build.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index da789a231..bdc6717ef 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -106,8 +106,8 @@ def parse_arguments():
         print("build pytorch without mkldnn backend")
 
     # work around to fix Raspberry pie crash
-    os.system(f"cd $HOME && git clone https://github.com/pytorch/builder.git")
-    os.system(f"cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
+    print("Applying mkl-dnn patch to fix Raspberry pie crash")
+    os.system(f"cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
     pytorch_wheel_name = complete_wheel("pytorch")
     print(f"Build Compelete. Created {pytorch_wheel_name}..")

From ab2443d2a8e666bfc589bc625990d4d9b092a6d4 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 2 Oct 2023 15:50:34 -0400
Subject: [PATCH 076/212] Fix linalg smoke tests (#1563)

---
 test/smoke_test/smoke_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 8ae1d1c51..3d1b6af64 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -179,7 +179,7 @@ def smoke_test_linalg() -> None:
     torch.dist(A, U @ torch.diag(S) @ Vh)
 
     U, S, Vh = torch.linalg.svd(A)
-    assert U.shape == A.shape and S.shape == torch.Size([3]) and Vh.shape == torch.Size([3, 3])
+    assert U.shape == torch.Size([5, 5]) and S.shape == torch.Size([3]) and Vh.shape == torch.Size([3, 3])
     torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)
 
     A = torch.randn(7, 5, 3)

From 50a6e91f972158529a7c9f06c4c6fd4510196e32 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 2 Oct 2023 21:02:59 -0700
Subject: [PATCH 077/212] Towards enabling M1 wheel builds

Do not try to install MKL on Apple Silicon
---
 wheel/build_wheel.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index 99d251977..a45522bf8 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -168,7 +168,11 @@ if [[ "$desired_python" == "3.11" ]]; then
 else
   retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq "numpy${NUMPY_PINNED_VERSION}" nomkl "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" typing_extensions requests
 fi
-retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq cmake ninja mkl-include==2022.2.1 mkl-static==2022.2.1 -c intel
+if [[ "$(uname -m)" == "arm64" ]]; then
+  retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq cmake ninja
+else
+  retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq cmake ninja mkl-include==2022.2.1 mkl-static==2022.2.1 -c intel
+fi
 retry pip install -qr "${pytorch_rootdir}/requirements.txt" || true
 
 # For USE_DISTRIBUTED=1 on macOS, need libuv and pkg-config to find libuv.

From bbb29b0467ecbf4fa1f14e51f30f68a0d7c9dc23 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 2 Oct 2023 21:12:50 -0700
Subject: [PATCH 078/212] And only install llvm-9 on x86 systems

---
 wheel/build_wheel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index a45522bf8..5ac52f4d4 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -184,7 +184,7 @@ if [[ -n "$CROSS_COMPILE_ARM64" ]]; then
     export USE_MKLDNN=OFF
     export USE_QNNPACK=OFF
     export BUILD_TEST=OFF
-else
+elif [[ "$(uname -m)" == "x86_64" ]]; then
     retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq llvmdev=9
     export USE_LLVM="${CONDA_PREFIX}"
 fi

From cc4f1f9055b9e32eb4c81d8525f10a537c913274 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 2 Oct 2023 22:23:09 -0700
Subject: [PATCH 079/212] Do not build tests when building natively on M1

---
 wheel/build_wheel.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index 5ac52f4d4..d93e16eff 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -179,8 +179,10 @@ retry pip install -qr "${pytorch_rootdir}/requirements.txt" || true
 export USE_DISTRIBUTED=1
 retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq libuv pkg-config
 
-if [[ -n "$CROSS_COMPILE_ARM64" ]]; then
-    export CMAKE_OSX_ARCHITECTURES=arm64
+if [[ -n "$CROSS_COMPILE_ARM64" || "$(uname -m)" == "arm64" ]]; then
+    if [[ -n "$CROSS_COMPILE_ARM64" ]]; then
+        export CMAKE_OSX_ARCHITECTURES=arm64
+    fi
     export USE_MKLDNN=OFF
     export USE_QNNPACK=OFF
     export BUILD_TEST=OFF

From 8bcc83dbb1e947f7e4c546ea19959d775e7fa7d6 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 2 Oct 2023 22:24:37 -0700
Subject: [PATCH 080/212] And fix Python-3.8 native compilation on M1

There are no numpy=3.17 for M1
---
 wheel/build_wheel.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index d93e16eff..75138fc09 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -151,7 +151,13 @@ case ${desired_python} in
         NUMPY_PINNED_VERSION="=1.19"
         ;;
     3.8)
-        NUMPY_PINNED_VERSION="=1.17"
+        if [[ "$(uname -m)" == "arm64" ]]; then
+          SETUPTOOLS_PINNED_VERSION=">=46.0.0"
+          PYYAML_PINNED_VERSION=">=5.3"
+          NUMPY_PINNED_VERSION="=1.19"
+        else
+          NUMPY_PINNED_VERSION="=1.17"
+        fi
         ;;
     *)
         NUMPY_PINNED_VERSION="=1.11.3"

From b39cccf546f54c95f15001d0cc5f2ce222d35fdd Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 3 Oct 2023 14:08:11 -0400
Subject: [PATCH 081/212] Release 2.1 update promotion scripts (#1564)

---
 release/promote.sh                      | 14 +++++++-------
 release/pypi/promote_pypi_to_staging.sh |  4 ++--
 release/release_versions.sh             | 10 +++++-----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/release/promote.sh b/release/promote.sh
index 1147dc0c9..b3656dda6 100644
--- a/release/promote.sh
+++ b/release/promote.sh
@@ -6,11 +6,11 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 source "${DIR}/release_versions.sh"
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-2.0.0}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.15.0}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.0.0}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.15.0}
-TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.6.0}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.1.0}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.0}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.0}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.0}
+TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.0}
 
 DRY_RUN=${DRY_RUN:-enabled}
 
@@ -104,9 +104,9 @@ promote_pypi() {
 # promote_s3 torchdata whl "${TORCHDATA_VERSION}"
 # promote_s3 "libtorch-*" libtorch "${PYTORCH_VERSION}"
 
-# promote_conda torchtriton conda "2.0.0"
-# promote_conda pytorch-cuda conda "11.7"
+# promote_conda torchtriton conda "2.1.0"
 # promote_conda pytorch-cuda conda "11.8"
+# promote_conda pytorch-cuda conda "12.1"
 
 # promote_conda pytorch conda "${PYTORCH_VERSION}"
 # promote_conda torchvision conda "${TORCHVISION_VERSION}"
diff --git a/release/pypi/promote_pypi_to_staging.sh b/release/pypi/promote_pypi_to_staging.sh
index 74f139680..678d9dd03 100644
--- a/release/pypi/promote_pypi_to_staging.sh
+++ b/release/pypi/promote_pypi_to_staging.sh
@@ -21,8 +21,8 @@ upload_pypi_to_staging() {
 }
 
 # Uncomment these to promote to pypi
-PYTORCH_LINUX_VERSION_SUFFIX="%2Bcu117.with.pypi.cudnn"
-LINUX_VERSION_SUFFIX="%2Bcu117"
+PYTORCH_LINUX_VERSION_SUFFIX="%2Bcu121.with.pypi.cudnn"
+LINUX_VERSION_SUFFIX="%2Bcu121"
 WIN_VERSION_SUFFIX="%2Bcpu"
 MACOS_X86_64="macosx_.*_x86_64"
 MACOS_ARM64="macosx_.*_arm64"
diff --git a/release/release_versions.sh b/release/release_versions.sh
index f0db2a089..ab35075b6 100644
--- a/release/release_versions.sh
+++ b/release/release_versions.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-2.0.0}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.15.0}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.0.0}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.15.0}
-TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.6.0}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.1.0}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.0}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.0}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.0}
+TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.0}

From 8a2aacef9a0a29ad6694a325b2f6162b862dd2d3 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 4 Oct 2023 08:52:08 -0700
Subject: [PATCH 082/212] [BE] Small code cleanup

Fold multiple inidices and single index generation into one loop

As loop body is the same anyway...
---
 s3_management/manage.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 57af9259b..f698deb87 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -404,21 +404,14 @@ def create_parser() -> argparse.ArgumentParser:
     return parser
 
 
-def main():
+def main() -> None:
     parser = create_parser()
     args = parser.parse_args()
     action = "Saving" if args.do_not_upload else "Uploading"
-    if args.prefix == 'all':
-        for prefix in PREFIXES_WITH_HTML:
-            print(f"INFO: {action} indices for '{prefix}'")
-            idx = S3Index.from_S3(prefix=prefix)
-            if args.do_not_upload:
-                idx.save_legacy_html()
-            else:
-                idx.upload_legacy_html()
-    else:
-        print(f"INFO: {action} indices for '{args.prefix}'")
-        idx = S3Index.from_S3(prefix=args.prefix)
+    prefixes = PREFIXES_WITH_HTML if args.prefix == 'all' else [args.prefix]
+    for prefix in prefixes:
+        print(f"INFO: {action} indices for '{prefix}'")
+        idx = S3Index.from_S3(prefix=prefix)
         if args.do_not_upload:
             idx.save_legacy_html()
             if args.generate_pep503:

From dddbbf7c9bd61b05447e32f35a1a77e7cf6213e3 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 4 Oct 2023 10:36:10 -0700
Subject: [PATCH 083/212] S3_management: Add option to  compute sha256

That will be used later to generate sha256 indexes in PEP503
---
 s3_management/manage.py | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index f698deb87..3b2e736fc 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -6,6 +6,7 @@
 import dataclasses
 import functools
 import time
+import hashlib
 
 from os import path, makedirs
 from datetime import datetime
@@ -116,7 +117,9 @@
 @functools.total_ordering
 class S3Object:
     key: str
+    orig_key: str
     checksum: Optional[str]
+    size: Optional[int]
 
     def __str__(self):
         return self.key
@@ -356,6 +359,20 @@ def save_pep503_htmls(self) -> None:
                 with open(path.join(subdir, pkg_name, "index.html"), mode="w", encoding="utf-8") as f:
                     f.write(self.to_simple_package_html(subdir=subdir, package_name=pkg_name))
 
+    def compute_sha256(self) -> None:
+        for obj in self.objects:
+            if obj.checksum is not None:
+                continue
+            print(f"Computing sha256 for {obj.orig_key} of size {obj.size}")
+            sha256_sum = hashlib.sha256()
+            s3_obj = BUCKET.Object(key=obj.orig_key)
+            sha256_sum.update(s3_obj.get()["Body"].read())
+            digest = sha256_sum.hexdigest()
+            s3_obj.metadata.update({"checksum-sha256": digest})
+            s3_obj.copy_from(CopySource={"Bucket": BUCKET.name, "Key": obj.orig_key},
+                             Metadata=s3_obj.metadata, MetadataDirective="REPLACE")
+
+
     @classmethod
     def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
         prefix = prefix.rstrip("/")
@@ -383,10 +400,15 @@ def fetch_metadata(key: str) :
                 # For older files, rely on checksum-sha256 metadata that can be added to the file later
                 if sha256 is None:
                     sha256 = response.get("Metadata", {}).get("checksum-sha256")
+                    if sha256 is not None:
+                        print(f"Find metadata for {obj_key}")
                 sanitized_key = obj_key.replace("+", "%2B")
+                size = response.get("ContentLength")
                 s3_object = S3Object(
                     key=sanitized_key,
+                    orig_key=obj_key,
                     checksum=sha256,
+                    size=int(size) if size else size,
                 )
                 objects.append(s3_object)
         return cls(objects, prefix)
@@ -401,18 +423,24 @@ def create_parser() -> argparse.ArgumentParser:
     )
     parser.add_argument("--do-not-upload", action="store_true")
     parser.add_argument("--generate-pep503", action="store_true")
+    parser.add_argument("--compute-sha256", action="store_true")
     return parser
 
 
 def main() -> None:
     parser = create_parser()
     args = parser.parse_args()
-    action = "Saving" if args.do_not_upload else "Uploading"
+    action = "Saving indices" if args.do_not_upload else "Uploading indices"
+    if args.compute_sha256:
+        action = "Computing checksums"
+
     prefixes = PREFIXES_WITH_HTML if args.prefix == 'all' else [args.prefix]
     for prefix in prefixes:
-        print(f"INFO: {action} indices for '{prefix}'")
+        print(f"INFO: {action} for '{prefix}'")
         idx = S3Index.from_S3(prefix=prefix)
-        if args.do_not_upload:
+        if args.compute_sha256:
+            idx.compute_sha256()
+        elif args.do_not_upload:
             idx.save_legacy_html()
             if args.generate_pep503:
                 idx.save_pep503_htmls()

From 312e0519507b9601ad980f4ad7c111dc44df934e Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 4 Oct 2023 10:42:01 -0700
Subject: [PATCH 084/212] Remove debug print

---
 s3_management/manage.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 3b2e736fc..38cb9e8bc 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -400,8 +400,6 @@ def fetch_metadata(key: str) :
                 # For older files, rely on checksum-sha256 metadata that can be added to the file later
                 if sha256 is None:
                     sha256 = response.get("Metadata", {}).get("checksum-sha256")
-                    if sha256 is not None:
-                        print(f"Find metadata for {obj_key}")
                 sanitized_key = obj_key.replace("+", "%2B")
                 size = response.get("ContentLength")
                 s3_object = S3Object(

From 2a7b17df319c184d060cb21d376e07cf3fe546a8 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 4 Oct 2023 12:28:46 -0700
Subject: [PATCH 085/212] [S3_management] Minor improvements

- Refactor `fetch_obj_names` into class method
- Make sure that object remains public when ACL is computed
- Add `has_public_read` and `grant_public_read` class methods
---
 s3_management/manage.py | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 38cb9e8bc..3412d1692 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -370,12 +370,28 @@ def compute_sha256(self) -> None:
             digest = sha256_sum.hexdigest()
             s3_obj.metadata.update({"checksum-sha256": digest})
             s3_obj.copy_from(CopySource={"Bucket": BUCKET.name, "Key": obj.orig_key},
-                             Metadata=s3_obj.metadata, MetadataDirective="REPLACE")
+                             Metadata=s3_obj.metadata, MetadataDirective="REPLACE",
+                             ACL="public-read",
+                             ChecksumAlgorithm="SHA256")
 
 
     @classmethod
-    def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
-        prefix = prefix.rstrip("/")
+    def has_public_read(cls:Type[S3IndexType], key: str) -> bool:
+        def is_all_users_group(o) -> bool:
+            return o.get("Grantee",{}).get("URI") == "http://acs.amazonaws.com/groups/global/AllUsers"
+
+        def can_read(o) -> bool:
+            return o.get("Permission") in ["READ", "FULL_CONTROL"]
+
+        acl_grants = CLIENT.get_object_acl(Bucket=BUCKET.name, Key=key)["Grants"]
+        return any(is_all_users_group(x) and can_read(x) for x in acl_grants)
+
+    @classmethod
+    def grant_public_read(cls: Type[S3IndexType], key: str) -> None:
+        CLIENT.put_object_acl(Bucket=BUCKET.name, Key=key, ACL="public-read")
+
+    @classmethod
+    def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]:
         obj_names = []
         for obj in BUCKET.objects.filter(Prefix=prefix):
             is_acceptable = any([path.dirname(obj.key) == prefix] + [
@@ -388,6 +404,12 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
             if not is_acceptable:
                 continue
             obj_names.append(obj.key)
+        return obj_names
+
+    @classmethod
+    def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
+        prefix = prefix.rstrip("/")
+        obj_names = cls.fetch_object_names(prefix)
         objects = []
         def fetch_metadata(key: str) :
             return CLIENT.head_object(Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled")

From 3e2d4a1e65f144897d4dec37bc57519b9d078396 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 4 Oct 2023 12:51:27 -0700
Subject: [PATCH 086/212] s3_management: compute checksum in cloud

I.e. file never gets downloaded on the client, which is a nice thing
---
 s3_management/manage.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 3412d1692..a2c79f211 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -6,7 +6,6 @@
 import dataclasses
 import functools
 import time
-import hashlib
 
 from os import path, makedirs
 from datetime import datetime
@@ -363,12 +362,8 @@ def compute_sha256(self) -> None:
         for obj in self.objects:
             if obj.checksum is not None:
                 continue
-            print(f"Computing sha256 for {obj.orig_key} of size {obj.size}")
-            sha256_sum = hashlib.sha256()
+            print(f"Updating {obj.orig_key} of size {obj.size} with SHA256 checksum")
             s3_obj = BUCKET.Object(key=obj.orig_key)
-            sha256_sum.update(s3_obj.get()["Body"].read())
-            digest = sha256_sum.hexdigest()
-            s3_obj.metadata.update({"checksum-sha256": digest})
             s3_obj.copy_from(CopySource={"Bucket": BUCKET.name, "Key": obj.orig_key},
                              Metadata=s3_obj.metadata, MetadataDirective="REPLACE",
                              ACL="public-read",

From 07efc44334baf7f900bf7ecbc56d1ab54010d7de Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Thu, 5 Oct 2023 07:30:32 -0700
Subject: [PATCH 087/212] [S3Management] Add `undelete_prefix` method

That can be used to recover object in a versioned bucket
---
 s3_management/manage.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index a2c79f211..f8674bc19 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -428,6 +428,18 @@ def fetch_metadata(key: str) :
                 objects.append(s3_object)
         return cls(objects, prefix)
 
+    @classmethod
+    def undelete_prefix(cls: Type[S3IndexType], prefix: str) -> None:
+        paginator = CLIENT.get_paginator("list_object_versions")
+        for page in paginator.paginate(Bucket=BUCKET.name, Prefix=prefix):
+            for obj in page.get("DeleteMarkers", []):
+                if not obj.get("IsLatest"):
+                    continue
+                obj_key, obj_version_id = obj["Key"], obj["VersionId"]
+                obj_ver = S3.ObjectVersion(BUCKET.name, obj_key, obj_version_id)
+                print(f"Undeleting {obj_key} deleted on {obj['LastModified']}")
+                obj_ver.delete()
+
 
 def create_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser("Manage S3 HTML indices for PyTorch")

From eebd2ce10fe8472ae56ddfcfbb8809d4bf1be01d Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 6 Oct 2023 12:51:34 -0400
Subject: [PATCH 088/212] Validate poetry for release (#1567)

* Validate poetry for release

* test

* test

* fixtypo
---
 .github/scripts/validate_poetry.sh            | 3 ++-
 .github/workflows/validate-linux-binaries.yml | 5 +----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.github/scripts/validate_poetry.sh b/.github/scripts/validate_poetry.sh
index c4e4fd154..3c41d5b45 100644
--- a/.github/scripts/validate_poetry.sh
+++ b/.github/scripts/validate_poetry.sh
@@ -26,10 +26,11 @@ if [[ ${MATRIX_CHANNEL} != "release" ]]; then
     fi
 else
     export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
+    poetry source add --priority=explicit pytorch "https://download.pytorch.org/whl/${MATRIX_DESIRED_CUDA}"
     if [[ ${TORCH_ONLY} == 'true' ]]; then
         poetry --quiet add torch
     else
-        poetry --quiet add torch torchaudio torchvision
+        poetry --quiet add --source pytorch torch torchaudio torchvision
     fi
 fi
 
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 12335e843..3c5aac2eb 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -68,11 +68,8 @@ jobs:
         eval "$(conda shell.bash hook)"
 
         # Special case PyPi installation package. And Install of PyPi package via poetry
-        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" ]] && \
-           ([[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} != "release" ]] || \
-            [[ ${MATRIX_GPU_ARCH_VERSION} == "11.7" && ${MATRIX_CHANNEL} == "release" ]]); then
+        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && ${MATRIX_GPU_ARCH_VERSION} == "12.1" ]]; then
           source ./.github/scripts/validate_pipy.sh
-          # temporary disable poetry check
           source ./.github/scripts/validate_poetry.sh
         fi
 

From a5f2068c2597a6be55159fb5aaa3bf361ccea621 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Sun, 8 Oct 2023 22:11:41 -0700
Subject: [PATCH 089/212] Use released version of 3.12 (#1568)

As it was released on Oct 6 2023: https://www.python.org/downloads/release/python-3120/
---
 common/install_cpython.sh | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/common/install_cpython.sh b/common/install_cpython.sh
index 8347822b6..67bffe36f 100755
--- a/common/install_cpython.sh
+++ b/common/install_cpython.sh
@@ -64,10 +64,6 @@ function build_cpython {
     check_var $py_ver
     check_var $PYTHON_DOWNLOAD_URL
     local py_ver_folder=$py_ver
-    # Only b2 version of 3.12 is available right now
-    if [ "$py_ver" = "3.12.0" ]; then
-        py_ver=$py_ver"b2"
-    fi
     wget -q $PYTHON_DOWNLOAD_URL/$py_ver_folder/Python-$py_ver.tgz
     do_cpython_build $py_ver none
     rm -f Python-$py_ver.tgz

From 0481289201f161445ae3ff34aa7fd762d50bd07b Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Tue, 10 Oct 2023 21:51:08 -0700
Subject: [PATCH 090/212] Move manywheel builds to `linux.12xlarge.ephemeral`
 (#1569)

Should be faster(<20 min vs 40+ min) and as secure as using GH ones
---
 .github/workflows/build-manywheel-images.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index bbac707dd..d717416f6 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -34,7 +34,7 @@ env:
 
 jobs:
   build-docker-cuda:
-    runs-on: ubuntu-22.04
+    runs-on: linux.12xlarge.ephemeral
     strategy:
       matrix:
         cuda_version: ["12.1", "11.8"]

From 00841b69a7f7d0aed526fe42444e57e60d813630 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 11 Oct 2023 05:09:54 +0000
Subject: [PATCH 091/212] Add cuSparseLt-0.5.0 to manywheel images

---
 common/install_cuda.sh | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index e087a44c1..8907284e0 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -3,7 +3,7 @@
 set -ex
 
 function install_118 {
-    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15"
+    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.5.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -31,10 +31,20 @@ function install_118 {
     cd ..
     rm -rf tmp_nccl
     ldconfig
+
+    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
+    mkdir tmp_cusparselt && pushd tmp_cusparselt
+    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.0.1-archive.tar.xz
+    tar xf libcusparse_lt-linux-x86_64-0.5.0.1-archive.tar.xz
+    cp -a libcusparse_lt-linux-x86_64-0.5.0.1-archive/include/* /usr/local/cuda/include/
+    cp -a libcusparse_lt-linux-x86_64-0.5.0.1-archive/lib/* /usr/local/cuda/lib64/
+    popd
+    rm -rf tmp_custparselt
+    ldconfig
 }
 
 function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1"
+    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1 and cuSparseLt-0.5.0"
     rm -rf /usr/local/cuda-12.1 /usr/local/cuda
     # install CUDA 12.1.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
@@ -62,6 +72,16 @@ function install_121 {
     cd ..
     rm -rf tmp_nccl
     ldconfig
+
+    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
+    mkdir tmp_cusparselt && pushd tmp_cusparselt
+    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.0.1-archive.tar.xz
+    tar xf libcusparse_lt-linux-x86_64-0.5.0.1-archive.tar.xz
+    cp -a libcusparse_lt-linux-x86_64-0.5.0.1-archive/include/* /usr/local/cuda/include/
+    cp -a libcusparse_lt-linux-x86_64-0.5.0.1-archive/lib/* /usr/local/cuda/lib64/
+    popd
+    rm -rf tmp_custparselt
+    ldconfig
 }
 
 function prune_118 {

From 321ab64ca5552da731fea5e26e3a2829b96b3b20 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Wed, 11 Oct 2023 08:35:36 -0700
Subject: [PATCH 092/212] Use `linux.12xlarge.ephemeral` for conda docker
 builds (#1570)

As `ubuntu.20.04` often OOM/failed to fetch data from RHEL repo
---
 .github/workflows/build-conda-images.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-conda-images.yml b/.github/workflows/build-conda-images.yml
index 4f4f9f43d..9e5290cef 100644
--- a/.github/workflows/build-conda-images.yml
+++ b/.github/workflows/build-conda-images.yml
@@ -28,7 +28,7 @@ env:
 
 jobs:
   build-docker:
-    runs-on: ubuntu-22.04
+    runs-on: linux.12xlarge.ephemeral
     strategy:
       matrix:
         cuda_version: ["11.8", "12.1", "cpu"]

From 4e68b6d6a3437b50f3774486bcab9e000940a421 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 11 Oct 2023 08:48:36 -0700
Subject: [PATCH 093/212] Revert "Add cuSparseLt-0.5.0 to manywheel images"

This reverts commit 00841b69a7f7d0aed526fe42444e57e60d813630 as
cuSparseLT is not compatible with CentOS 7
---
 common/install_cuda.sh | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index 8907284e0..e087a44c1 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -3,7 +3,7 @@
 set -ex
 
 function install_118 {
-    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.5.0"
+    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -31,20 +31,10 @@ function install_118 {
     cd ..
     rm -rf tmp_nccl
     ldconfig
-
-    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
-    mkdir tmp_cusparselt && pushd tmp_cusparselt
-    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.0.1-archive.tar.xz
-    tar xf libcusparse_lt-linux-x86_64-0.5.0.1-archive.tar.xz
-    cp -a libcusparse_lt-linux-x86_64-0.5.0.1-archive/include/* /usr/local/cuda/include/
-    cp -a libcusparse_lt-linux-x86_64-0.5.0.1-archive/lib/* /usr/local/cuda/lib64/
-    popd
-    rm -rf tmp_custparselt
-    ldconfig
 }
 
 function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1 and cuSparseLt-0.5.0"
+    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1"
     rm -rf /usr/local/cuda-12.1 /usr/local/cuda
     # install CUDA 12.1.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
@@ -72,16 +62,6 @@ function install_121 {
     cd ..
     rm -rf tmp_nccl
     ldconfig
-
-    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
-    mkdir tmp_cusparselt && pushd tmp_cusparselt
-    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.0.1-archive.tar.xz
-    tar xf libcusparse_lt-linux-x86_64-0.5.0.1-archive.tar.xz
-    cp -a libcusparse_lt-linux-x86_64-0.5.0.1-archive/include/* /usr/local/cuda/include/
-    cp -a libcusparse_lt-linux-x86_64-0.5.0.1-archive/lib/* /usr/local/cuda/lib64/
-    popd
-    rm -rf tmp_custparselt
-    ldconfig
 }
 
 function prune_118 {

From 4395f498f4e1f6624cb1a4f7b1c83ea912fff7e4 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Wed, 11 Oct 2023 09:24:06 -0700
Subject: [PATCH 094/212] Move libtorch docker builder to
 `linux.12xlarge.ephemeral` (#1571)

As running it on `ubutu22.04` often results in flay infra failures/running out of disk space, for example, from https://github.com/pytorch/builder/actions/runs/6484948230/job/17609933012
```
cat: write error: No space left on device
```
---
 .github/workflows/build-libtorch-images.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml
index d62030047..7968bbb26 100644
--- a/.github/workflows/build-libtorch-images.yml
+++ b/.github/workflows/build-libtorch-images.yml
@@ -30,7 +30,7 @@ env:
 
 jobs:
   build-docker-cuda:
-    runs-on: ubuntu-22.04
+    runs-on: linux.12xlarge.ephemeral
     strategy:
       matrix:
         cuda_version: ["12.1", "11.8"]

From ce1c649a3a778726cb57129c57dbfe2c8fa19713 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 11 Oct 2023 05:09:54 +0000
Subject: [PATCH 095/212] Add cuSparseLt-0.4.0 to manywheel images

But set USE_CUSPARSELT to 0 by default
---
 common/install_cuda.sh  | 23 +++++++++++++++++++----
 manywheel/build_cuda.sh |  1 +
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index e087a44c1..f09666e64 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -2,8 +2,19 @@
 
 set -ex
 
+function install_cusparselt_040 {
+    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
+    mkdir tmp_cusparselt && pushd tmp_cusparselt
+    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz
+    tar xf libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz
+    cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/include/* /usr/local/cuda/include/
+    cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/lib/* /usr/local/cuda/lib64/
+    popd
+    rm -rf tmp_custparselt
+}
+
 function install_118 {
-    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15"
+    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.5.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -20,7 +31,6 @@ function install_118 {
     cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_cudnn
-    ldconfig
 
     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
     mkdir tmp_nccl && cd tmp_nccl
@@ -30,11 +40,14 @@ function install_118 {
     cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_nccl
+
+    install_cusparselt_040
+
     ldconfig
 }
 
 function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1"
+    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1 and cuSparseLt-0.5.0"
     rm -rf /usr/local/cuda-12.1 /usr/local/cuda
     # install CUDA 12.1.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
@@ -51,7 +64,6 @@ function install_121 {
     cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_cudnn
-    ldconfig
 
     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
     mkdir tmp_nccl && cd tmp_nccl
@@ -61,6 +73,9 @@ function install_121 {
     cp -a nccl_2.18.1-1+cuda12.1_x86_64/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_nccl
+
+    install_cusparselt_040
+
     ldconfig
 }
 
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index afb4aa811..2689dbd48 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -13,6 +13,7 @@ export ATEN_STATIC_CUDA=1
 export USE_CUDA_STATIC_LINK=1
 export INSTALL_TEST=0 # dont install test binaries into site-packages
 export USE_CUPTI_SO=0
+export USE_CUSPARSELT=0 # disable for now
 
 # Keep an array of cmake variables to add to
 if [[ -z "$CMAKE_ARGS" ]]; then

From 94b198b7eae9f1ad40331feca26634d43dfc81da Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Fri, 13 Oct 2023 08:58:28 -0700
Subject: [PATCH 096/212] Add xformers to the list of indexable packages

---
 s3_management/manage.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index f8674bc19..a8bda72e2 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -99,6 +99,7 @@
     "tqdm",
     "typing_extensions",
     "urllib3",
+    "xformers",
 }
 
 # Should match torch-2.0.0.dev20221221+cu118-cp310-cp310-linux_x86_64.whl as:

From f01d7105b19b417802731fa2092df5fa9f911bf2 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Fri, 13 Oct 2023 14:45:01 -0700
Subject: [PATCH 097/212] Build wheels with cuSparseLt

Build libtorch without cuSparseLt so far

Factor out `DEPS_LIST` to top level and add cuSparseLt of
`USE_CUSPARSELT` is set to 1

Tested in https://github.com/pytorch/pytorch/pull/111245
---
 libtorch/build.sh       |  2 +-
 manywheel/build_cuda.sh | 30 +++++++++++++++++-------------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/libtorch/build.sh b/libtorch/build.sh
index 88c8c6f9a..d9f78fd66 100644
--- a/libtorch/build.sh
+++ b/libtorch/build.sh
@@ -7,4 +7,4 @@ set -ex
 
 SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
-BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.8" ${SCRIPTPATH}/../manywheel/build.sh
+USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.8" ${SCRIPTPATH}/../manywheel/build.sh
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 2689dbd48..c59cbca16 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -13,7 +13,7 @@ export ATEN_STATIC_CUDA=1
 export USE_CUDA_STATIC_LINK=1
 export INSTALL_TEST=0 # dont install test binaries into site-packages
 export USE_CUPTI_SO=0
-export USE_CUSPARSELT=0 # disable for now
+export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
 
 # Keep an array of cmake variables to add to
 if [[ -z "$CMAKE_ARGS" ]]; then
@@ -113,16 +113,26 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
     LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
 fi
 
+DEPS_LIST=(
+    "$LIBGOMP_PATH"
+)
+DEPS_SONAME=(
+    "libgomp.so.1"
+)
+
+if [[ $USE_CUSPARSELT == "1" ]]; then
+        DEPS_SONAME+=(
+            "libcusparseLt.so.0"
+        )
+        DEPS_LIST+=(
+            "/usr/local/cuda/lib64/libcusparseLt.so.0"
+        )
+fi
+
 if [[ $CUDA_VERSION == "12.1" ]]; then
     export USE_STATIC_CUDNN=0
     # Try parallelizing nvcc as well
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
-    DEPS_LIST=(
-        "$LIBGOMP_PATH"
-    )
-    DEPS_SONAME=(
-        "libgomp.so.1"
-    )
 
     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
         echo "Bundling with cudnn and cublas."
@@ -187,12 +197,6 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then
     export USE_STATIC_CUDNN=0
     # Try parallelizing nvcc as well
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
-    DEPS_LIST=(
-        "$LIBGOMP_PATH"
-    )
-    DEPS_SONAME=(
-        "libgomp.so.1"
-    )
 
     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
         echo "Bundling with cudnn and cublas."

From d41bcbfe52fc38bf323a3f2a8165289abdcabd23 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 16 Oct 2023 16:29:53 -0700
Subject: [PATCH 098/212] Do not build conda with CuSparseLT

---
 conda/build_pytorch.sh          | 1 +
 conda/pytorch-nightly/meta.yaml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 6f8eaf502..029372303 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -351,6 +351,7 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
          PYTORCH_GITHUB_ROOT_DIR="$pytorch_rootdir" \
          PYTORCH_BUILD_STRING="$build_string" \
          PYTORCH_MAGMA_CUDA_VERSION="$cuda_nodot" \
+         USE_CUSPARSELT=0 \
          conda build -c "$ANACONDA_USER" \
                      ${NO_TEST:-} \
                      --no-anaconda-upload \
diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 79025dc8c..59efc729b 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -94,6 +94,7 @@ build:
     - USE_COREML_DELEGATE # [osx]
     - _GLIBCXX_USE_CXX11_ABI # [unix]
     - OVERRIDE_TORCH_CUDA_ARCH_LIST
+    - USE_CUSPARSELT
 
 test:
  imports:

From 63cb272d4e556bc535a579f1cc077acf0001ef01 Mon Sep 17 00:00:00 2001
From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com>
Date: Wed, 18 Oct 2023 02:45:43 -0500
Subject: [PATCH 099/212] Add ROCM_PATH env var to Dockerfile for ROCm5.7 issue
 with finding HIP (#1572)

---
 libtorch/Dockerfile  | 4 ++++
 manywheel/Dockerfile | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile
index c01c6416e..5d5b707af 100644
--- a/libtorch/Dockerfile
+++ b/libtorch/Dockerfile
@@ -59,6 +59,10 @@ FROM cpu as rocm
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 ENV MKLROOT /opt/intel
+# Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0) 
+# find HIP works for ROCm5.7. Not needed for ROCm6.0 and above.
+# Remove below when ROCm5.7 is not in support matrix anymore.
+ENV ROCM_PATH /opt/rocm
 # No need to install ROCm as base docker image should have full ROCm install
 #ADD ./common/install_rocm.sh install_rocm.sh
 ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
diff --git a/manywheel/Dockerfile b/manywheel/Dockerfile
index d3e9ad2ef..4edaef193 100644
--- a/manywheel/Dockerfile
+++ b/manywheel/Dockerfile
@@ -159,6 +159,10 @@ FROM cpu_final as rocm_final
 ARG ROCM_VERSION=3.7
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
+# Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0) 
+# find HIP works for ROCm5.7. Not needed for ROCm6.0 and above.
+# Remove below when ROCm5.7 is not in support matrix anymore.
+ENV ROCM_PATH /opt/rocm
 # No need to install ROCm as base docker image should have full ROCm install
 #ADD ./common/install_rocm.sh install_rocm.sh
 #RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh

From 85df81032b2c3508585f72ca05c1b312804b0266 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 23 Oct 2023 20:48:31 -0700
Subject: [PATCH 100/212] [aarch64_wheel] Minor typing improvements

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index bdc6717ef..0c413b07e 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -30,7 +30,7 @@ def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
               "cp -r src $acl_install_dir; cd /")
 
 
-def complete_wheel(folder: str):
+def complete_wheel(folder: str) -> str:
     '''
     Complete wheel build and put in artifact location
     '''

From a414219beff5961ee40af86fe1db31c54982c240 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 23 Oct 2023 20:49:50 -0700
Subject: [PATCH 101/212] [aarch64_wheel] Flake8 fix

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 0c413b07e..9cc4ac4d8 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -107,7 +107,7 @@ def parse_arguments():
 
     # work around to fix Raspberry pie crash
     print("Applying mkl-dnn patch to fix Raspberry pie crash")
-    os.system(f"cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
+    os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
     pytorch_wheel_name = complete_wheel("pytorch")
     print(f"Build Compelete. Created {pytorch_wheel_name}..")

From e11155aa6bae87e16a7b3e96eb235354d6405c01 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 23 Oct 2023 20:50:42 -0700
Subject: [PATCH 102/212] [aarch64_wheel] Cosmetic changes

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 9cc4ac4d8..4a8fcc875 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -87,7 +87,7 @@ def parse_arguments():
         version = override_package_version
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
     else:
-        if branch == 'nightly' or branch == 'master':
+        if branch in ['nightly', 'master']:
             build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
             version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
             build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "

From c5e331c0858e37fedc047707466161dfe0cadff6 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 23 Oct 2023 20:52:12 -0700
Subject: [PATCH 103/212] [aarch64_wheel] Fix readdir crash

Probably fixes https://github.com/pytorch/pytorch/issues/111695
---
 aarch64_linux/aarch64_wheel_ci_build.py    |  2 ++
 mkldnn_fix/aarch64-fix-readdir-crash.patch | 14 ++++++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 mkldnn_fix/aarch64-fix-readdir-crash.patch

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 4a8fcc875..3fc86053c 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -108,6 +108,8 @@ def parse_arguments():
     # work around to fix Raspberry pie crash
     print("Applying mkl-dnn patch to fix Raspberry pie crash")
     os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
+    print("Applying mkl-dnn patch to fix readdir crash")
+    os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-readdir-crash.patch")
     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
     pytorch_wheel_name = complete_wheel("pytorch")
     print(f"Build Compelete. Created {pytorch_wheel_name}..")
diff --git a/mkldnn_fix/aarch64-fix-readdir-crash.patch b/mkldnn_fix/aarch64-fix-readdir-crash.patch
new file mode 100644
index 000000000..81d46d406
--- /dev/null
+++ b/mkldnn_fix/aarch64-fix-readdir-crash.patch
@@ -0,0 +1,14 @@
+diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp b/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp
+index cb800b2509..5516373b90 100644
+--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp
++++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp
+@@ -170,6 +170,8 @@ int Cpu::getFilePathMaxTailNumPlus1(const char *path) {
+   fflush(stdout);
+ 
+   DIR *dir = opendir(dir_path);
++  if (dir == NULL)
++    return 0;
+   struct dirent *dp;
+ 
+   dp = readdir(dir);
+

From 13c071f2783ec4100fae94c74d2285ccedf5acb7 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 30 Oct 2023 08:48:30 -0700
Subject: [PATCH 104/212] [S3_management] generate libtorch index.html

---
 s3_management/manage.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index a8bda72e2..f17526f6a 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -23,8 +23,8 @@
 
 ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz")
 ACCEPTED_SUBDIR_PATTERNS = [
-    r"cu[0-9]+",           # for cuda
-    r"rocm[0-9]+\.[0-9]+", # for rocm
+    r"cu[0-9]+",            # for cuda
+    r"rocm[0-9]+\.[0-9]+",  # for rocm
     "cpu",
 ]
 PREFIXES_WITH_HTML = {
@@ -32,6 +32,8 @@
     "whl/lts/1.8": "torch_lts.html",
     "whl/nightly": "torch_nightly.html",
     "whl/test": "torch_test.html",
+    "libtorch": "index.html",
+    "libtorch/nightly": "index.html",
 }
 
 # NOTE: This refers to the name on the wheels themselves and not the name of
@@ -141,6 +143,7 @@ def extract_package_build_time(full_package_name: str) -> datetime:
             pass
     return datetime.now()
 
+
 def between_bad_dates(package_build_time: datetime):
     start_bad = datetime(year=2022, month=8, day=17)
     end_bad = datetime(year=2022, month=12, day=30)
@@ -207,7 +210,7 @@ def _resolve_subdir(self, subdir: Optional[str] = None) -> str:
 
     def gen_file_list(
         self,
-        subdir: Optional[str]=None,
+        subdir: Optional[str] = None,
         package_name: Optional[str] = None
     ) -> Iterable[S3Object]:
         objects = (
@@ -238,7 +241,7 @@ def obj_to_package_name(self, obj: S3Object) -> str:
 
     def to_legacy_html(
         self,
-        subdir: Optional[str]=None
+        subdir: Optional[str] = None
     ) -> str:
         """Generates a string that can be used as the HTML index
 
@@ -277,7 +280,7 @@ def to_simple_package_html(
         out.append('<!DOCTYPE html>')
         out.append('<html>')
         out.append('  <body>')
-        out.append('    <h1>Links for {}</h1>'.format(package_name.lower().replace("_","-")))
+        out.append('    <h1>Links for {}</h1>'.format(package_name.lower().replace("_", "-")))
         for obj in sorted(self.gen_file_list(subdir, package_name)):
             maybe_fragment = f"#sha256={obj.checksum}" if obj.checksum else ""
             out.append(f'    <a href="/{obj.key}{maybe_fragment}">{path.basename(obj.key).replace("%2B","+")}</a><br/>')
@@ -370,11 +373,10 @@ def compute_sha256(self) -> None:
                              ACL="public-read",
                              ChecksumAlgorithm="SHA256")
 
-
     @classmethod
-    def has_public_read(cls:Type[S3IndexType], key: str) -> bool:
+    def has_public_read(cls: Type[S3IndexType], key: str) -> bool:
         def is_all_users_group(o) -> bool:
-            return o.get("Grantee",{}).get("URI") == "http://acs.amazonaws.com/groups/global/AllUsers"
+            return o.get("Grantee", {}).get("URI") == "http://acs.amazonaws.com/groups/global/AllUsers"
 
         def can_read(o) -> bool:
             return o.get("Permission") in ["READ", "FULL_CONTROL"]
@@ -403,13 +405,23 @@ def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]:
         return obj_names
 
     @classmethod
-    def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType:
+    def from_S3(cls: Type[S3IndexType], prefix: str, with_metadata: bool = True) -> S3IndexType:
         prefix = prefix.rstrip("/")
         obj_names = cls.fetch_object_names(prefix)
         objects = []
-        def fetch_metadata(key: str) :
+
+        def fetch_metadata(key: str):
             return CLIENT.head_object(Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled")
 
+        def sanitize_key(key: str) -> str:
+            return key.replace("+", "%2B")
+
+        if not with_metadata:
+            return cls([S3Object(key=sanitize_key(key),
+                                 orig_key=key,
+                                 checksum=None,
+                                 size=None) for key in obj_names], prefix)
+
         with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
             # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
             for obj_key, future in {key: executor.submit(fetch_metadata, key) for key in obj_names}.items():
@@ -418,10 +430,9 @@ def fetch_metadata(key: str) :
                 # For older files, rely on checksum-sha256 metadata that can be added to the file later
                 if sha256 is None:
                     sha256 = response.get("Metadata", {}).get("checksum-sha256")
-                sanitized_key = obj_key.replace("+", "%2B")
                 size = response.get("ContentLength")
                 s3_object = S3Object(
-                    key=sanitized_key,
+                    key=sanitize_key(obj_key),
                     orig_key=obj_key,
                     checksum=sha256,
                     size=int(size) if size else size,
@@ -465,7 +476,7 @@ def main() -> None:
     prefixes = PREFIXES_WITH_HTML if args.prefix == 'all' else [args.prefix]
     for prefix in prefixes:
         print(f"INFO: {action} for '{prefix}'")
-        idx = S3Index.from_S3(prefix=prefix)
+        idx = S3Index.from_S3(prefix=prefix, with_metadata=args.generate_pep503 or args.compute_sha256)
         if args.compute_sha256:
             idx.compute_sha256()
         elif args.do_not_upload:

From 3229f7f7026dbba9565dbea3f8d6328b82eeedad Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 30 Oct 2023 13:44:14 -0700
Subject: [PATCH 105/212] [CI] Update ruff to 0.1.1

To keep it in sync with pytorch
---
 .lintrunner.toml                     | 2 +-
 tools/linter/adapters/ruff_linter.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.lintrunner.toml b/.lintrunner.toml
index 5c63a6c5d..b7375092a 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -15,6 +15,6 @@ init_command = [
     'python3',
     'tools/linter/adapters/pip_init.py',
     '--dry-run={{DRYRUN}}',
-    'ruff==0.0.290',
+    'ruff==0.1.1',
 ]
 is_formatter = true
diff --git a/tools/linter/adapters/ruff_linter.py b/tools/linter/adapters/ruff_linter.py
index 451834aa7..1fb14aa59 100644
--- a/tools/linter/adapters/ruff_linter.py
+++ b/tools/linter/adapters/ruff_linter.py
@@ -227,7 +227,7 @@ def check_files(
                 "ruff",
                 "--exit-zero",
                 "--quiet",
-                "--format=json",
+                "--output-format=json",
                 *([f"--config={config}"] if config else []),
                 *filenames,
             ],

From e9ce243cf5174035a68ec23d70156be7399ebd94 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Mon, 30 Oct 2023 17:23:15 -0700
Subject: [PATCH 106/212] Get rid of http://repo.okay.com.mx (#1575)

---
 manywheel/Dockerfile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/manywheel/Dockerfile b/manywheel/Dockerfile
index 4edaef193..1adc4ba22 100644
--- a/manywheel/Dockerfile
+++ b/manywheel/Dockerfile
@@ -145,7 +145,6 @@ RUN yum install -y python3-pip && \
     ln -s /usr/local/bin/cmake /usr/bin/cmake
 
 # ninja
-RUN yum install -y http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-5.el7.noarch.rpm
 RUN yum install -y ninja-build
 
 FROM cpu_final as cuda_final

From d12d1f204151caa889a4df35a3f6e8c0992cf809 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 31 Oct 2023 00:48:12 +0000
Subject: [PATCH 107/212] [S3_management] Print time it takes to fetch index

---
 s3_management/manage.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index f17526f6a..2b83d5174 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -476,7 +476,10 @@ def main() -> None:
     prefixes = PREFIXES_WITH_HTML if args.prefix == 'all' else [args.prefix]
     for prefix in prefixes:
         print(f"INFO: {action} for '{prefix}'")
+        stime = time.time()
         idx = S3Index.from_S3(prefix=prefix, with_metadata=args.generate_pep503 or args.compute_sha256)
+        etime = time.time()
+        print(f"DEBUG: Fetched {len(idx.objects)} objects for '{prefix}' in {etime-stime:.2f} seconds")
         if args.compute_sha256:
             idx.compute_sha256()
         elif args.do_not_upload:

From 96cbf68ff44368977cd6e847ee8d1c85e3ec8b10 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 31 Oct 2023 01:00:22 +0000
Subject: [PATCH 108/212] [S3_manage] Handle invalid versions

---
 s3_management/manage.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 2b83d5174..412db197f 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -12,7 +12,7 @@
 from collections import defaultdict
 from typing import Iterable, List, Type, Dict, Set, TypeVar, Optional
 from re import sub, match, search
-from packaging.version import parse
+from packaging.version import parse as _parse_version, Version, InvalidVersion
 
 import boto3
 
@@ -150,6 +150,12 @@ def between_bad_dates(package_build_time: datetime):
     return start_bad <= package_build_time <= end_bad
 
 
+def safe_parse_version(ver_str: str) -> Version:
+    try:
+        return _parse_version(ver_str)
+    except InvalidVersion:
+        return Version(0, 0, 0)
+
 class S3Index:
     def __init__(self: S3IndexType, objects: List[S3Object], prefix: str) -> None:
         self.objects = objects
@@ -177,7 +183,7 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[S3Object]:
         # sorting, sorts in reverse to put the most recent versions first
         all_sorted_packages = sorted(
             {self.normalize_package_version(obj) for obj in self.objects},
-            key=lambda name_ver: parse(name_ver.split('-', 1)[-1]),
+            key=lambda name_ver: safe_parse_version(name_ver.split('-', 1)[-1]),
             reverse=True,
         )
         packages: Dict[str, int] = defaultdict(int)

From ad9cc665c45b7c5069791aebcc51ca743f97bd77 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 31 Oct 2023 04:27:17 +0000
Subject: [PATCH 109/212] [S3_management] Fix Version on error

And fix flake8 lint violation
---
 s3_management/manage.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 412db197f..11c454003 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -154,7 +154,8 @@ def safe_parse_version(ver_str: str) -> Version:
     try:
         return _parse_version(ver_str)
     except InvalidVersion:
-        return Version(0, 0, 0)
+        return Version("0.0.0")
+
 
 class S3Index:
     def __init__(self: S3IndexType, objects: List[S3Object], prefix: str) -> None:

From 838c550302632d4d55cef4d8272a961312441aba Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 31 Oct 2023 04:10:44 +0000
Subject: [PATCH 110/212] [S3_Management] Refactor `from_S3`

Move `fetch_metadata` into its own method, which could be called later on

Make S3Object non-frozen and introduce implicit __hash__ method
---
 s3_management/manage.py | 62 ++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 11c454003..90eee7907 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -115,7 +115,7 @@
 S3IndexType = TypeVar('S3IndexType', bound='S3Index')
 
 
-@dataclasses.dataclass(frozen=True)
+@dataclasses.dataclass(frozen=False)
 @functools.total_ordering
 class S3Object:
     key: str
@@ -123,6 +123,9 @@ class S3Object:
     checksum: Optional[str]
     size: Optional[int]
 
+    def __hash__(self):
+        return hash(self.key)
+
     def __str__(self):
         return self.key
 
@@ -157,6 +160,7 @@ def safe_parse_version(ver_str: str) -> Version:
         return Version("0.0.0")
 
 
+
 class S3Index:
     def __init__(self: S3IndexType, objects: List[S3Object], prefix: str) -> None:
         self.objects = objects
@@ -411,41 +415,43 @@ def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]:
             obj_names.append(obj.key)
         return obj_names
 
+    def fetch_metadata(self: S3IndexType) -> None:
+        # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
+        with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
+            for idx, future in {
+                idx: executor.submit(
+                    lambda key: CLIENT.head_object(
+                        Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled"
+                    ),
+                    obj.orig_key,
+                )
+                for (idx, obj) in enumerate(self.objects)
+                if obj.size is None
+            }.items():
+                response = future.result()
+                sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
+                # For older files, rely on checksum-sha256 metadata that can be added to the file later
+                if sha256 is None:
+                    sha256 = response.get("Metadata", {}).get("checksum-sha256")
+                self.objects[idx].checksum = sha256
+                if size := response.get("ContentLength"):
+                    self.objects[idx].size = int(size)
+
     @classmethod
     def from_S3(cls: Type[S3IndexType], prefix: str, with_metadata: bool = True) -> S3IndexType:
         prefix = prefix.rstrip("/")
         obj_names = cls.fetch_object_names(prefix)
-        objects = []
-
-        def fetch_metadata(key: str):
-            return CLIENT.head_object(Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled")
 
         def sanitize_key(key: str) -> str:
             return key.replace("+", "%2B")
 
-        if not with_metadata:
-            return cls([S3Object(key=sanitize_key(key),
-                                 orig_key=key,
-                                 checksum=None,
-                                 size=None) for key in obj_names], prefix)
-
-        with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
-            # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
-            for obj_key, future in {key: executor.submit(fetch_metadata, key) for key in obj_names}.items():
-                response = future.result()
-                sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
-                # For older files, rely on checksum-sha256 metadata that can be added to the file later
-                if sha256 is None:
-                    sha256 = response.get("Metadata", {}).get("checksum-sha256")
-                size = response.get("ContentLength")
-                s3_object = S3Object(
-                    key=sanitize_key(obj_key),
-                    orig_key=obj_key,
-                    checksum=sha256,
-                    size=int(size) if size else size,
-                )
-                objects.append(s3_object)
-        return cls(objects, prefix)
+        rc = cls([S3Object(key=sanitize_key(key),
+                           orig_key=key,
+                           checksum=None,
+                           size=None) for key in obj_names], prefix)
+        if with_metadata:
+            rc.fetch_metadata()
+        return rc
 
     @classmethod
     def undelete_prefix(cls: Type[S3IndexType], prefix: str) -> None:

From 7b1a100d1b18ccdc8a955f78b0c91b8f67986ea9 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 31 Oct 2023 04:27:56 +0000
Subject: [PATCH 111/212] [S3_Management] Filter nighly before `fetch_metadata`

This reduces time to call `from_S3Index` from 600 to 80 sec
---
 s3_management/manage.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 90eee7907..83a383ff2 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -172,7 +172,7 @@ def __init__(self: S3IndexType, objects: List[S3Object], prefix: str) -> None:
             path.dirname(obj.key) for obj in objects if path.dirname != prefix
         }
 
-    def nightly_packages_to_show(self: S3IndexType) -> Set[S3Object]:
+    def nightly_packages_to_show(self: S3IndexType) -> List[S3Object]:
         """Finding packages to show based on a threshold we specify
 
         Basically takes our S3 packages, normalizes the version for easier
@@ -205,10 +205,10 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[S3Object]:
                 to_hide.add(obj)
             else:
                 packages[package_name] += 1
-        return set(self.objects).difference({
+        return list(set(self.objects).difference({
             obj for obj in self.objects
             if self.normalize_package_version(obj) in to_hide
-        })
+        }))
 
     def is_obj_at_root(self, obj: S3Object) -> bool:
         return path.dirname(obj.key) == self.prefix
@@ -224,10 +224,7 @@ def gen_file_list(
         subdir: Optional[str] = None,
         package_name: Optional[str] = None
     ) -> Iterable[S3Object]:
-        objects = (
-            self.nightly_packages_to_show() if self.prefix == 'whl/nightly'
-            else self.objects
-        )
+        objects = self.objects
         subdir = self._resolve_subdir(subdir) + '/'
         for obj in objects:
             if package_name is not None and self.obj_to_package_name(obj) != package_name:
@@ -449,6 +446,8 @@ def sanitize_key(key: str) -> str:
                            orig_key=key,
                            checksum=None,
                            size=None) for key in obj_names], prefix)
+        if prefix == "whl/nightly":
+           rc.objects = rc.nightly_packages_to_show()
         if with_metadata:
             rc.fetch_metadata()
         return rc

From 02c06296b09f6792cfd0109a95ddcffdf7e173fb Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Mon, 30 Oct 2023 21:56:49 -0700
Subject: [PATCH 112/212] Add option to build -arm64- libtorch binaries

---
 wheel/build_wheel.sh | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index 75138fc09..e2dc089dd 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -99,8 +99,10 @@ mkdir -p "$whl_tmp_dir"
 
 if [[ -n "$CROSS_COMPILE_ARM64" || $(uname -m) == "arm64" ]]; then
     mac_version='macosx_11_0_arm64'
+    libtorch_arch='-arm64-'
 else
     mac_version='macosx_10_9_x86_64'
+    libtorch_arch=''
 fi
 
 # Create a consistent wheel package name to rename the wheel to
@@ -264,17 +266,19 @@ else
         cp -r "$(pwd)/any_wheel/torch/lib/include" "$(pwd)/libtorch/"
     fi
     cp -r "$(pwd)/any_wheel/torch/share/cmake" "$(pwd)/libtorch/share/"
-    if [[ -x "$(pwd)/any_wheel/torch/.dylibs/libiomp5.dylib" ]]; then
-        cp -r "$(pwd)/any_wheel/torch/.dylibs/libiomp5.dylib" "$(pwd)/libtorch/lib/"
-    else
-        cp -r "$(pwd)/any_wheel/torch/lib/libiomp5.dylib" "$(pwd)/libtorch/lib/"
+    if [[ "${libtorch_arch}" != "-arm64-" ]]; then
+      if [[ -x "$(pwd)/any_wheel/torch/.dylibs/libiomp5.dylib" ]]; then
+          cp -r "$(pwd)/any_wheel/torch/.dylibs/libiomp5.dylib" "$(pwd)/libtorch/lib/"
+      else
+          cp -r "$(pwd)/any_wheel/torch/lib/libiomp5.dylib" "$(pwd)/libtorch/lib/"
+      fi
     fi
     rm -rf "$(pwd)/any_wheel"
 
     echo $PYTORCH_BUILD_VERSION > libtorch/build-version
     echo "$(pushd $pytorch_rootdir && git rev-parse HEAD)" > libtorch/build-hash
 
-    zip -rq "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos-$PYTORCH_BUILD_VERSION.zip" libtorch
-    cp "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos-$PYTORCH_BUILD_VERSION.zip"  \
-       "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos-latest.zip"
+    zip -rq "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos${libtorch_arch}-$PYTORCH_BUILD_VERSION.zip" libtorch
+    cp "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos${libtorch_arch}-$PYTORCH_BUILD_VERSION.zip"  \
+       "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos${libtorch_arch}-latest.zip"
 fi

From 617327ece91dc0308cb4447657ddd1e718237a56 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 31 Oct 2023 07:57:24 -0700
Subject: [PATCH 113/212] [Docker] Remove trailing whitespace

And cause docker rebuild, to overwrite docker build from release/2.1
branch artifacts
---
 manywheel/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/manywheel/Dockerfile b/manywheel/Dockerfile
index 1adc4ba22..df8f5acd8 100644
--- a/manywheel/Dockerfile
+++ b/manywheel/Dockerfile
@@ -158,7 +158,7 @@ FROM cpu_final as rocm_final
 ARG ROCM_VERSION=3.7
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-# Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0) 
+# Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0)
 # find HIP works for ROCm5.7. Not needed for ROCm6.0 and above.
 # Remove below when ROCm5.7 is not in support matrix anymore.
 ENV ROCM_PATH /opt/rocm

From 9467b4ea1d5456dc6d42bc4420792063406649c5 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 1 Nov 2023 14:55:58 -0700
Subject: [PATCH 114/212] [MacOS] Small changes to libtorch naming

Intel x86 libtorch builds will have `x86_64` suffix and Apple Silicon ones will have `arm64` ones, but latest will point to Intel ones for now.
---
 wheel/build_wheel.sh | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index e2dc089dd..c3c45cf42 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -99,10 +99,10 @@ mkdir -p "$whl_tmp_dir"
 
 if [[ -n "$CROSS_COMPILE_ARM64" || $(uname -m) == "arm64" ]]; then
     mac_version='macosx_11_0_arm64'
-    libtorch_arch='-arm64-'
+    libtorch_arch='arm64'
 else
     mac_version='macosx_10_9_x86_64'
-    libtorch_arch=''
+    libtorch_arch='x86_64'
 fi
 
 # Create a consistent wheel package name to rename the wheel to
@@ -266,7 +266,7 @@ else
         cp -r "$(pwd)/any_wheel/torch/lib/include" "$(pwd)/libtorch/"
     fi
     cp -r "$(pwd)/any_wheel/torch/share/cmake" "$(pwd)/libtorch/share/"
-    if [[ "${libtorch_arch}" != "-arm64-" ]]; then
+    if [[ "${libtorch_arch}" == "x86_64" ]]; then
       if [[ -x "$(pwd)/any_wheel/torch/.dylibs/libiomp5.dylib" ]]; then
           cp -r "$(pwd)/any_wheel/torch/.dylibs/libiomp5.dylib" "$(pwd)/libtorch/lib/"
       else
@@ -278,7 +278,12 @@ else
     echo $PYTORCH_BUILD_VERSION > libtorch/build-version
     echo "$(pushd $pytorch_rootdir && git rev-parse HEAD)" > libtorch/build-hash
 
-    zip -rq "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos${libtorch_arch}-$PYTORCH_BUILD_VERSION.zip" libtorch
-    cp "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos${libtorch_arch}-$PYTORCH_BUILD_VERSION.zip"  \
-       "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos${libtorch_arch}-latest.zip"
+    zip -rq "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos-${libtorch_arch}-$PYTORCH_BUILD_VERSION.zip" libtorch
+    cp "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos-${libtorch_arch}-$PYTORCH_BUILD_VERSION.zip"  \
+       "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos-${libtorch_arch}-latest.zip"
+    if [[ "${libtorch_arch}" == "x86_64" ]]; then
+      # For backward compatibility make unarched latest to point to x86_64
+      cp "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos-${libtorch_arch}-$PYTORCH_BUILD_VERSION.zip"  \
+         "$PYTORCH_FINAL_PACKAGE_DIR/libtorch-macos-latest.zip"
+    fi
 fi

From df5f7c57855603dd629eb2568c8accce6d18daf5 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Thu, 2 Nov 2023 18:03:18 -0700
Subject: [PATCH 115/212] Update libtorch/Dockerfile to use Ubuntu-20.04
 (#1578)

As 18.04 EOLed
---
 libtorch/Dockerfile      | 2 +-
 libtorch/build_docker.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile
index 5d5b707af..8d69ac944 100644
--- a/libtorch/Dockerfile
+++ b/libtorch/Dockerfile
@@ -1,5 +1,5 @@
 ARG BASE_TARGET=base
-ARG GPU_IMAGE=ubuntu:18.04
+ARG GPU_IMAGE=ubuntu:20.04
 FROM ${GPU_IMAGE} as base
 
 ENV DEBIAN_FRONTEND=noninteractive
diff --git a/libtorch/build_docker.sh b/libtorch/build_docker.sh
index c799bb178..8997f69cf 100755
--- a/libtorch/build_docker.sh
+++ b/libtorch/build_docker.sh
@@ -15,13 +15,13 @@ case ${GPU_ARCH_TYPE} in
     cpu)
         BASE_TARGET=cpu
         DOCKER_TAG=cpu
-        GPU_IMAGE=ubuntu:18.04
+        GPU_IMAGE=ubuntu:20.04
         DOCKER_GPU_BUILD_ARG=""
         ;;
     cuda)
         BASE_TARGET=cuda${GPU_ARCH_VERSION}
         DOCKER_TAG=cuda${GPU_ARCH_VERSION}
-        GPU_IMAGE=ubuntu:18.04
+        GPU_IMAGE=ubuntu:20.04
         DOCKER_GPU_BUILD_ARG=""
         ;;
     rocm)

From 16b77c7152c62ef0c31ad4cadc27a1f620f40917 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Fri, 3 Nov 2023 19:04:03 -0700
Subject: [PATCH 116/212] Conda builds should respect `MAX_JOBS`

May be this help with OOMs
---
 conda/pytorch-nightly/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 59efc729b..4a03c3d05 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -93,6 +93,7 @@ build:
     - USE_PYTORCH_METAL_EXPORT # [osx]
     - USE_COREML_DELEGATE # [osx]
     - _GLIBCXX_USE_CXX11_ABI # [unix]
+    - MAX_JOBS # [unix]
     - OVERRIDE_TORCH_CUDA_ARCH_LIST
     - USE_CUSPARSELT
 

From ca0040fa24f8bf1929718c255afd17a4f962a6a3 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Fri, 10 Nov 2023 13:59:17 -0800
Subject: [PATCH 117/212] [S3_management] Fix subpackage urls

Make them `lower()`
---
 s3_management/manage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 83a383ff2..47c151f08 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -310,7 +310,7 @@ def to_simple_packages_html(
         out.append('<html>')
         out.append('  <body>')
         for pkg_name in sorted(self.get_package_names(subdir)):
-            out.append(f'    <a href="{pkg_name.replace("_","-")}/">{pkg_name.replace("_","-")}</a><br/>')
+            out.append(f'    <a href="{pkg_name.lower().replace("_","-")}/">{pkg_name.replace("_","-")}</a><br/>')
         # Adding html footer
         out.append('  </body>')
         out.append('</html>')

From 4cfde0044e9f3b1454029c2533189743f7452d72 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 14 Nov 2023 10:52:36 -0500
Subject: [PATCH 118/212] Advance versions for release 2.1.1 (#1583)

---
 release/promote.sh          | 10 +++++-----
 release/release_versions.sh | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/release/promote.sh b/release/promote.sh
index b3656dda6..a7f273bc1 100644
--- a/release/promote.sh
+++ b/release/promote.sh
@@ -6,11 +6,11 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 source "${DIR}/release_versions.sh"
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-2.1.0}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.0}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.0}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.0}
-TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.0}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.1.1}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.1}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
+TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
 
 DRY_RUN=${DRY_RUN:-enabled}
 
diff --git a/release/release_versions.sh b/release/release_versions.sh
index ab35075b6..d362cb1ca 100644
--- a/release/release_versions.sh
+++ b/release/release_versions.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-2.1.0}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.0}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.0}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.0}
-TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.0}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.1.1}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.1}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
+TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}

From 75a877030d102fb04b4c3dbd08be2cefaaadd56c Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 14 Nov 2023 14:19:48 -0500
Subject: [PATCH 119/212] [aarch64] Release pypi prep script change for aarch64
 builds (#1585)

---
 release/pypi/promote_pypi_to_staging.sh |  4 ++--
 release/pypi/upload_pypi_to_staging.sh  | 10 ++++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/release/pypi/promote_pypi_to_staging.sh b/release/pypi/promote_pypi_to_staging.sh
index 678d9dd03..a9dbe535b 100644
--- a/release/pypi/promote_pypi_to_staging.sh
+++ b/release/pypi/promote_pypi_to_staging.sh
@@ -34,13 +34,13 @@ PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""
 PLATFORM="${MACOS_ARM64}"        VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}" # m1 mac
 
 PLATFORM="linux_x86_64"          VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
-PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX=""                        upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
+PLATFORM="linux_aarch64"         VERSION_SUFFIX=""                        upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 PLATFORM="win_amd64"             VERSION_SUFFIX="${WIN_VERSION_SUFFIX}"   upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""                        upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 PLATFORM="${MACOS_ARM64}"        VERSION_SUFFIX=""                        upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 
 PLATFORM="linux_x86_64"          VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
-PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX=""                        upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
+PLATFORM="linux_aarch64"         VERSION_SUFFIX=""                        upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
 PLATFORM="win_amd64"             VERSION_SUFFIX="${WIN_VERSION_SUFFIX}"   upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
 PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""                        upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
 PLATFORM="${MACOS_ARM64}"        VERSION_SUFFIX=""                        upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
diff --git a/release/pypi/upload_pypi_to_staging.sh b/release/pypi/upload_pypi_to_staging.sh
index a9573eac2..250b231f9 100644
--- a/release/pypi/upload_pypi_to_staging.sh
+++ b/release/pypi/upload_pypi_to_staging.sh
@@ -42,10 +42,16 @@ fi
 
 for pkg in ${pkgs_to_promote}; do
     pkg_basename="$(basename "${pkg}")"
-    # Don't attempt to change if manylinux2014
-    if [[ "${pkg}" != *manylinux2014* ]]; then
+
+    if [[ "${pkg}" != *aarch64* ]]; then
         # sub out linux for manylinux1
         pkg_basename="$(basename "${pkg//linux/manylinux1}")"
+    elif [[ "${pkg}" == *manylinux_2_17_aarch64* ]]; then
+        # strip manylinux_2_17 from core filename
+        pkg_basename="$(basename "${pkg//manylinux_2_17_aarch64./}")"
+    elif [[ "${pkg}" == *linux_aarch64* ]]; then
+        # domains change linux_aarch64 to manylinux2014_aarch64
+        pkg_basename="$(basename "${pkg//linux_aarch64/manylinux2014_aarch64}")"
     fi
     orig_pkg="${tmp_dir}/${pkg_basename}"
     (

From 8aa71bd9f28ed682443f1d3f35721f1ae3b52863 Mon Sep 17 00:00:00 2001
From: albanD <desmaison.alban@gmail.com>
Date: Wed, 15 Nov 2023 09:40:48 -0500
Subject: [PATCH 120/212] Changes needed for core enablement of 3.12 binary
 wheels (#1586)

---
 aarch64_linux/aarch64_ci_setup.sh |  7 +++++--
 conda/pytorch-nightly/meta.yaml   |  9 ++++++---
 manywheel/build_common.sh         |  3 +++
 manywheel/build_cuda.sh           |  4 +++-
 manywheel/build_rocm.sh           |  4 +++-
 wheel/build_wheel.sh              | 13 ++++++++++++-
 windows/condaenv.bat              |  1 +
 windows/internal/smoke_test.bat   |  1 +
 8 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh
index 6d2d780fe..ace6a85fb 100755
--- a/aarch64_linux/aarch64_ci_setup.sh
+++ b/aarch64_linux/aarch64_ci_setup.sh
@@ -19,7 +19,10 @@ curl -L -o /mambaforge.sh https://github.com/conda-forge/miniforge/releases/late
 chmod +x /mambaforge.sh
 /mambaforge.sh -b -p /opt/conda
 rm /mambaforge.sh
-/opt/conda/bin/conda config --set ssl_verify False
-/opt/conda/bin/conda install -y -c conda-forge python=${DESIRED_PYTHON} numpy pyyaml setuptools patchelf pygit2 openblas ninja scons
+source /opt/conda/etc/profile.d/conda.sh
+conda config --set ssl_verify False
+conda create -y -c conda-forge -n aarch64_env python=${DESIRED_PYTHON}
+conda activate aarch64_env
+conda install -y -c conda-forge numpy==1.26.0 pyyaml==6.0.1 patchelf==0.17.2 pygit2==1.13.2 openblas==0.3.24 ninja==1.11.1 scons==4.5.2
 python --version
 conda --version
diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 4a03c3d05..882f13da0 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -21,7 +21,8 @@ requirements:
     - pyyaml
     {% if cross_compile_arm64 == 0 %}
     - mkl-include # [x86_64]
-    - mkl=2020.2 # [x86_64 and not win]
+    - mkl=2020.2 # [py <= 311 and x86_64 and not win]
+    - mkl=2023.1 # [py >= 312 and x86_64 and not win]
     - mkl=2021.4 # [x86_64 and win]
     {% endif %}
     - typing_extensions
@@ -29,9 +30,11 @@ requirements:
     - libuv # [win]
     - numpy=1.19 # [py <= 39]
     - numpy=1.21.5 # [py == 310]
-    - numpy=1.23.5 # [py >= 311]
+    - numpy=1.23.5 # [py == 311]
+    - numpy=1.26.1 # [py >= 312]
     - openssl=1.1.1l # [py >= 38 and py <= 310 and linux]
-    - openssl=1.1.1s # [py >= 311 and linux]
+    - openssl=1.1.1s # [py == 311 and linux]
+    - openssl=3.1.4 # [py >= 312 and linux]
 {{ environ.get('PYTORCH_LLVM_PACKAGE', '    - llvmdev=9') }}
 {{ environ.get('MAGMA_PACKAGE', '') }}
 
diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh
index f4794b719..31f188d94 100644
--- a/manywheel/build_common.sh
+++ b/manywheel/build_common.sh
@@ -131,6 +131,9 @@ case ${DESIRED_PYTHON} in
   cp311*)
     retry pip install -q numpy==1.23.1
     ;;
+  cp312*)
+    retry pip install -q numpy==1.26.1
+    ;;
   # Should catch 3.9+
   *)
     retry pip install -q numpy==1.19.4
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index c59cbca16..31f4e263b 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -263,7 +263,9 @@ else
 fi
 
 # TODO: Remove me when Triton has a proper release channel
-if [[ $(uname) == "Linux" ]]; then
+# No triton dependency for now on 3.12 since we don't have binaries for it
+# and torch.compile doesn't work.
+if [[ $(uname) == "Linux" && "$DESIRED_PYTHON" != "3.12" ]]; then
     TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.github/ci_commit_pins/triton.txt)
 
     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh
index 0fed5970b..4fbca7697 100755
--- a/manywheel/build_rocm.sh
+++ b/manywheel/build_rocm.sh
@@ -222,7 +222,9 @@ if [[ $ROCM_INT -ge 50600 ]]; then
 fi
 
 # Add triton install dependency
-if [[ $(uname) == "Linux" ]]; then
+# No triton dependency for now on 3.12 since we don't have binaries for it
+# and torch.compile doesn't work.
+if [[ $(uname) == "Linux" && "$DESIRED_PYTHON" != "3.12" ]]; then
     TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-rocm.txt)
     TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
 
diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index c3c45cf42..1186bc56a 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -136,23 +136,33 @@ export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
 SETUPTOOLS_PINNED_VERSION="=46.0.0"
 PYYAML_PINNED_VERSION="=5.3"
 EXTRA_CONDA_INSTALL_FLAGS=""
-case ${desired_python} in
+case $desired_python in
+    3.12)
+        echo "Using 3.12 deps"
+        SETUPTOOLS_PINNED_VERSION=">=68.0.0"
+        PYYAML_PINNED_VERSION=">=6.0.1"
+        NUMPY_PINNED_VERSION="==1.26.0"
+        ;;
     3.11)
+        echo "Using 3.11 deps"
         SETUPTOOLS_PINNED_VERSION=">=46.0.0"
         PYYAML_PINNED_VERSION=">=5.3"
         NUMPY_PINNED_VERSION="==1.23.5"
         ;;
     3.10)
+        echo "Using 3.10 deps"
         SETUPTOOLS_PINNED_VERSION=">=46.0.0"
         PYYAML_PINNED_VERSION=">=5.3"
         NUMPY_PINNED_VERSION="=1.21.2"
         ;;
     3.9)
+        echo "Using 3.9 deps"
         SETUPTOOLS_PINNED_VERSION=">=46.0.0"
         PYYAML_PINNED_VERSION=">=5.3"
         NUMPY_PINNED_VERSION="=1.19"
         ;;
     3.8)
+        echo "Using 3.8 deps"
         if [[ "$(uname -m)" == "arm64" ]]; then
           SETUPTOOLS_PINNED_VERSION=">=46.0.0"
           PYYAML_PINNED_VERSION=">=5.3"
@@ -162,6 +172,7 @@ case ${desired_python} in
         fi
         ;;
     *)
+        echo "Using default deps"
         NUMPY_PINNED_VERSION="=1.11.3"
         ;;
 esac
diff --git a/windows/condaenv.bat b/windows/condaenv.bat
index 464eeb01c..cf1b2c865 100644
--- a/windows/condaenv.bat
+++ b/windows/condaenv.bat
@@ -14,6 +14,7 @@ FOR %%v IN (%DESIRED_PYTHON%) DO (
     if "%%v" == "3.9" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy>=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
     if "%%v" == "3.10" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.21.3 "mkl=2020.2" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
     if "%%v" == "3.11" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.23.4 "mkl=2020.2" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
+    if "%%v" == "3.12" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.26.0 "mkl=2023.1" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
     if "%%v" == "3" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
 )
 endlocal
diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index ad276b992..1ade2cbda 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -30,6 +30,7 @@ exit /b 1
 echo "install wheel package"
 
 set PYTHON_INSTALLER_URL=
+if "%DESIRED_PYTHON%" == "3.12" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.12.0/python-3.12.0-amd64.exe"
 if "%DESIRED_PYTHON%" == "3.11" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.11.0/python-3.11.0-amd64.exe"
 if "%DESIRED_PYTHON%" == "3.10" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe"
 if "%DESIRED_PYTHON%" == "3.9" set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.9.0/python-3.9.0-amd64.exe"

From c6cbe7700a9e361a6d33e6d5f40956806147da2d Mon Sep 17 00:00:00 2001
From: albanD <desmaison.alban@gmail.com>
Date: Wed, 15 Nov 2023 14:58:16 -0500
Subject: [PATCH 121/212] Fix aarch64 build on 3.8 (#1593)

---
 aarch64_linux/aarch64_ci_setup.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh
index ace6a85fb..f10e49b40 100755
--- a/aarch64_linux/aarch64_ci_setup.sh
+++ b/aarch64_linux/aarch64_ci_setup.sh
@@ -23,6 +23,13 @@ source /opt/conda/etc/profile.d/conda.sh
 conda config --set ssl_verify False
 conda create -y -c conda-forge -n aarch64_env python=${DESIRED_PYTHON}
 conda activate aarch64_env
-conda install -y -c conda-forge numpy==1.26.0 pyyaml==6.0.1 patchelf==0.17.2 pygit2==1.13.2 openblas==0.3.24 ninja==1.11.1 scons==4.5.2
+
+if [[ "$DESIRED_PYTHON"  == "3.8" ]]; then
+    NUMPY_VERSION="1.24.4"
+else
+    NUMPY_VERSION="1.26.0"
+fi
+conda install -y -c conda-forge numpy==${NUMPY_VERSION} pyyaml==6.0.1 patchelf==0.17.2 pygit2==1.13.2 openblas==0.3.24 ninja==1.11.1 scons==4.5.2
+
 python --version
 conda --version

From 4c7fa069848aa32a24e47cae0e5996bc0eeeb70a Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Wed, 15 Nov 2023 19:35:28 -0800
Subject: [PATCH 122/212] Add some more validation checks for torch.linalg.eigh
 and torch.compile (#1580)

* Add some more validation checks for torch.linalg.eigh and torch.compile

* Update test

* Also update smoke_test.py

* Fix lint
---
 check_binary.sh                          |  6 ++++++
 test/smoke_test/smoke_test.py            |  3 +++
 test_example_code/torch_compile_smoke.py | 12 ++++++++++++
 3 files changed, 21 insertions(+)
 create mode 100644 test_example_code/torch_compile_smoke.py

diff --git a/check_binary.sh b/check_binary.sh
index 30b44b535..9e7d03a54 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -404,6 +404,12 @@ if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRE
     echo "Test that linalg works"
     python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.svd(torch.mm(x.t(), x)))"
 
+    echo "Test that linalg.eigh works"
+    python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.eigh(torch.mm(x.t(), x)))"
+
+    echo "Checking that basic torch.compile works"
+    python ${TEST_CODE_DIR}/torch_compile_smoke.py
+
     popd
   fi # if libtorch
 fi # if cuda
diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 3d1b6af64..64efc7601 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -193,6 +193,9 @@ def smoke_test_linalg() -> None:
             A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
             torch.linalg.svd(A)
 
+        A = torch.rand(3, 3, device="cuda")
+        L, Q = torch.linalg.eigh(torch.mm(A.t(), A))
+
 
 def smoke_test_compile() -> None:
     supported_dtypes = [torch.float16, torch.float32, torch.float64]
diff --git a/test_example_code/torch_compile_smoke.py b/test_example_code/torch_compile_smoke.py
new file mode 100644
index 000000000..7a12a013e
--- /dev/null
+++ b/test_example_code/torch_compile_smoke.py
@@ -0,0 +1,12 @@
+import torch
+
+
+def foo(x: torch.Tensor) -> torch.Tensor:
+    return torch.sin(x) + torch.cos(x)
+
+
+if __name__ == "__main__":
+    x = torch.rand(3, 3, device="cuda")
+    x_eager = foo(x)
+    x_pt2 = torch.compile(foo)(x)
+    print(torch.allclose(x_eager, x_pt2))

From b321562c29350cc195083913d2d561d820cb60b5 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Wed, 15 Nov 2023 21:06:00 -0800
Subject: [PATCH 123/212] Revert "Add some more validation checks for
 torch.linalg.eigh and torch.compile (#1580)" (#1594)

This reverts commit 4c7fa069848aa32a24e47cae0e5996bc0eeeb70a.
---
 check_binary.sh                          |  6 ------
 test/smoke_test/smoke_test.py            |  3 ---
 test_example_code/torch_compile_smoke.py | 12 ------------
 3 files changed, 21 deletions(-)
 delete mode 100644 test_example_code/torch_compile_smoke.py

diff --git a/check_binary.sh b/check_binary.sh
index 9e7d03a54..30b44b535 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -404,12 +404,6 @@ if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRE
     echo "Test that linalg works"
     python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.svd(torch.mm(x.t(), x)))"
 
-    echo "Test that linalg.eigh works"
-    python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.eigh(torch.mm(x.t(), x)))"
-
-    echo "Checking that basic torch.compile works"
-    python ${TEST_CODE_DIR}/torch_compile_smoke.py
-
     popd
   fi # if libtorch
 fi # if cuda
diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 64efc7601..3d1b6af64 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -193,9 +193,6 @@ def smoke_test_linalg() -> None:
             A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
             torch.linalg.svd(A)
 
-        A = torch.rand(3, 3, device="cuda")
-        L, Q = torch.linalg.eigh(torch.mm(A.t(), A))
-
 
 def smoke_test_compile() -> None:
     supported_dtypes = [torch.float16, torch.float32, torch.float64]
diff --git a/test_example_code/torch_compile_smoke.py b/test_example_code/torch_compile_smoke.py
deleted file mode 100644
index 7a12a013e..000000000
--- a/test_example_code/torch_compile_smoke.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import torch
-
-
-def foo(x: torch.Tensor) -> torch.Tensor:
-    return torch.sin(x) + torch.cos(x)
-
-
-if __name__ == "__main__":
-    x = torch.rand(3, 3, device="cuda")
-    x_eager = foo(x)
-    x_pt2 = torch.compile(foo)(x)
-    print(torch.allclose(x_eager, x_pt2))

From 252463f16d20a060c51969ad4a289ceae55d28ad Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 16 Nov 2023 17:15:07 -0500
Subject: [PATCH 124/212] Release validations using release version matrix
 (#1611)

* Release pypi prep change (#1587)

* [aarch64] Release pypi prep script change for aarch64 builds

* Release versions for testing

Testing calling version (#1588)

Upstream/release validations (#1589)

* Testing calling version

* add release matrix

Upstream/release validations (#1590)

* Testing calling version

* add release matrix

* test

test (#1591)

test (#1592)

Release v1 (#1595)

* test

* test

Release v1 (#1596)

* test

* test

* test

test (#1597)

Test versions validations (#1598)

* test

* basedir

Test versions validations (#1599)

* test

* basedir

* test

test (#1600)

* test

* test

Add release versions everywhere (#1601)

* test

* test

* test

* test

test (#1602)

Test version validations (#1603)

* test

* test

Test version validations (#1604)

* test

* test

* test

tests (#1605)

More tests nov16 (#1606)

* tests

* test

More tests nov16 (#1607)

* tests

* test

* test

More tests nov16 (#1608)

* tests

* test

* test

* test

More tests nov16 (#1609)

* tests

* test

* test

* test

* test

* fix_lint
---
 .../validate-aarch64-linux-binaries.yml       | 23 ++++++++++-
 .github/workflows/validate-binaries.yml       | 27 ++++++++++++-
 .github/workflows/validate-linux-binaries.yml | 23 +++++++++++
 .../validate-macos-arm64-binaries.yml         | 22 +++++++++++
 .github/workflows/validate-macos-binaries.yml | 22 +++++++++++
 .../workflows/validate-windows-binaries.yml   | 24 +++++++++++-
 test/smoke_test/smoke_test.py                 | 38 ++++++++++++++++++-
 7 files changed, 174 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index d3e57fd5c..14b7b6395 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -17,6 +17,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
   workflow_dispatch:
     inputs:
       channel:
@@ -38,6 +48,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
 
 jobs:
   generate-aarch64-linux-matrix:
@@ -47,7 +67,6 @@ jobs:
       os: linux-aarch64
       channel: ${{ inputs.channel }}
       with-cuda: disable
-
   linux-aarch64:
     needs: generate-aarch64-linux-matrix
     strategy:
@@ -72,6 +91,8 @@ jobs:
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="linux-aarch64"
         export TORCH_ONLY=${{ inputs.torchonly }}
+        export RELEASE_VERSION=${{ inputs.version }}
+        printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
         eval "$(conda shell.bash hook)"
 
         # Standart case: Validate binaries
diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
index 9c877f4ed..f6f73b012 100644
--- a/.github/workflows/validate-binaries.yml
+++ b/.github/workflows/validate-binaries.yml
@@ -60,44 +60,69 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate'
+        default: ""
+        required: false
+        type: string
+
 
 jobs:
+  generate-release-matrix:
+    uses: pytorch/test-infra/.github/workflows/generate_release_matrix.yml@main
+    with:
+      version: ${{ inputs.version }}
+
   win:
     if:  inputs.os == 'windows' || inputs.os == 'all'
+    needs: generate-release-matrix
     uses: ./.github/workflows/validate-windows-binaries.yml
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
       torchonly: ${{ inputs.torchonly }}
+      version: ${{ inputs.version }}
+      release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
 
   linux:
     if:  inputs.os == 'linux' || inputs.os == 'all'
+    needs: generate-release-matrix
     uses: ./.github/workflows/validate-linux-binaries.yml
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
       torchonly: ${{ inputs.torchonly }}
+      version: ${{ inputs.version }}
+      release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
 
   linux-aarch64:
     if:  inputs.os == 'linux-aarch64'
+    needs: generate-release-matrix
     uses: ./.github/workflows/validate-aarch64-linux-binaries.yml
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
       torchonly: ${{ inputs.torchonly }}
-
+      version: ${{ inputs.version }}
+      release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
   mac:
     if:  inputs.os == 'macos' || inputs.os == 'all'
+    needs: generate-release-matrix
     uses: ./.github/workflows/validate-macos-binaries.yml
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
       torchonly: ${{ inputs.torchonly }}
+      version: ${{ inputs.version }}
+      release-matrix: ${{ needs.generate-rlease-matrix.outputs.matrix }}
 
   mac-arm64:
     if:  inputs.os == 'macos' || inputs.os == 'all'
+    needs: generate-release-matrix
     uses: ./.github/workflows/validate-macos-arm64-binaries.yml
     with:
       channel: ${{ inputs.channel }}
       ref: ${{ inputs.ref || github.ref }}
       torchonly: ${{ inputs.torchonly }}
+      version: ${{ inputs.version }}
+      release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 3c5aac2eb..dcbfc93a0 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -17,6 +17,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
   workflow_dispatch:
     inputs:
       channel:
@@ -38,6 +48,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
 
 jobs:
   generate-linux-matrix:
@@ -64,8 +84,11 @@ jobs:
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TORCH_ONLY=${{ inputs.torchonly }}
+        export RELEASE_VERSION=${{ inputs.version }}
         export TARGET_OS="linux"
         eval "$(conda shell.bash hook)"
+        printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
+        cat release_matrix.json
 
         # Special case PyPi installation package. And Install of PyPi package via poetry
         if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && ${MATRIX_GPU_ARCH_VERSION} == "12.1" ]]; then
diff --git a/.github/workflows/validate-macos-arm64-binaries.yml b/.github/workflows/validate-macos-arm64-binaries.yml
index f23dec3f6..541183b9a 100644
--- a/.github/workflows/validate-macos-arm64-binaries.yml
+++ b/.github/workflows/validate-macos-arm64-binaries.yml
@@ -17,6 +17,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
   workflow_dispatch:
     inputs:
       channel:
@@ -38,6 +48,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
 
 jobs:
   generate-macos-arm64-matrix:
@@ -64,4 +84,6 @@ jobs:
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="macos-arm64"
         export TORCH_ONLY=${{ inputs.torchonly }}
+        export RELEASE_VERSION=${{ inputs.version }}
+        printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
         source ./.github/scripts/validate_binaries.sh
diff --git a/.github/workflows/validate-macos-binaries.yml b/.github/workflows/validate-macos-binaries.yml
index 0926dbe93..9610b36f7 100644
--- a/.github/workflows/validate-macos-binaries.yml
+++ b/.github/workflows/validate-macos-binaries.yml
@@ -17,6 +17,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
   workflow_dispatch:
     inputs:
       channel:
@@ -38,6 +48,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
 
 jobs:
   generate-macos-matrix:
@@ -64,4 +84,6 @@ jobs:
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="macos"
         export TORCH_ONLY=${{ inputs.torchonly }}
+        export RELEASE_VERSION=${{ inputs.version }}
+        printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
         source ./.github/scripts/validate_binaries.sh
diff --git a/.github/workflows/validate-windows-binaries.yml b/.github/workflows/validate-windows-binaries.yml
index 96d2b281e..1c501cfb3 100644
--- a/.github/workflows/validate-windows-binaries.yml
+++ b/.github/workflows/validate-windows-binaries.yml
@@ -17,6 +17,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
   workflow_dispatch:
     inputs:
       channel:
@@ -38,6 +48,16 @@ on:
         default: false
         required: false
         type: boolean
+      version:
+        description: 'Version to validate - optional'
+        default: ""
+        required: false
+        type: string
+      release-matrix:
+        description: 'Release matrix - optional'
+        default: ""
+        required: false
+        type: string
 
 jobs:
   generate-windows-matrix:
@@ -46,7 +66,6 @@ jobs:
       package-type: all
       os: windows
       channel: ${{ inputs.channel }}
-
   win:
     needs: generate-windows-matrix
     strategy:
@@ -66,9 +85,10 @@ jobs:
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TARGET_OS="windows"
         export TORCH_ONLY=${{ inputs.torchonly }}
+        export RELEASE_VERSION=${{ inputs.version }}
+        printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
         source /c/Jenkins/Miniconda3/etc/profile.d/conda.sh
         if [[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" ]]; then
           ./windows/internal/driver_update.bat
         fi
-
         source ./.github/scripts/validate_binaries.sh
diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 3d1b6af64..a4cd6dff2 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -3,18 +3,20 @@
 import sys
 import argparse
 import torch
+import json
 import importlib
 import subprocess
 import torch._dynamo
 import torch.nn as nn
 import torch.nn.functional as F
+from pathlib import Path
 
 gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION")
 gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE")
 channel = os.getenv("MATRIX_CHANNEL")
-stable_version = os.getenv("MATRIX_STABLE_VERSION")
 package_type = os.getenv("MATRIX_PACKAGE_TYPE")
 target_os = os.getenv("TARGET_OS")
+BASE_DIR =  Path(__file__).parent.parent.parent
 
 is_cuda_system = gpu_arch_type == "cuda"
 NIGHTLY_ALLOWED_DELTA = 3
@@ -52,8 +54,27 @@ def forward(self, x):
         output = self.fc1(x)
         return output
 
+def load_json_from_basedir(filename: str):
+    try:
+        with open(BASE_DIR / filename) as fptr:
+            return json.load(fptr)
+    except FileNotFoundError as exc:
+        raise ImportError(f"File {filename} not found error: {exc.strerror}") from exc
+    except json.JSONDecodeError as exc:
+        raise ImportError(f"Invalid JSON {filename}") from exc
+
+def read_release_matrix():
+    return load_json_from_basedir("release_matrix.json")
 
 def check_version(package: str) -> None:
+    release_version = os.getenv("RELEASE_VERSION")
+    # if release_version is specified, use it to validate the packages
+    if(release_version):
+        release_matrix = read_release_matrix()
+        stable_version = release_matrix["torch"]
+    else:
+        stable_version = os.getenv("MATRIX_STABLE_VERSION")
+
     # only makes sense to check nightly package where dates are known
     if channel == "nightly":
         check_nightly_binaries_date(package)
@@ -62,6 +83,20 @@ def check_version(package: str) -> None:
             raise RuntimeError(
                 f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}"
             )
+
+        if release_version and package == "all":
+            for module in MODULES:
+                imported_module = importlib.import_module(module["name"])
+                module_version = imported_module.__version__
+                if not module_version.startswith(release_matrix[module["name"]]):
+                    raise RuntimeError(
+                        f"{module['name']} version mismatch, expected: \
+                            {release_matrix[module['name']]} for channel {channel}. But its {module_version}"
+                    )
+                else:
+                     print(f"{module['name']} version actual: {module_version} expected: \
+                        {release_matrix[module['name']]} for channel {channel}.")
+
     else:
         print(f"Skip version check for channel {channel} as stable version is None")
 
@@ -255,6 +290,7 @@ def main() -> None:
     )
     options = parser.parse_args()
     print(f"torch: {torch.__version__}")
+
     check_version(options.package)
     smoke_test_conv2d()
     smoke_test_linalg()

From a91f149b0e4ee62c757d3a11995082617e6f9cbe Mon Sep 17 00:00:00 2001
From: Luo Bo <84075753+0x804d8000@users.noreply.github.com>
Date: Fri, 17 Nov 2023 06:15:19 +0800
Subject: [PATCH 125/212] fix: typo (#1581)

---
 manywheel/build_libtorch.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/manywheel/build_libtorch.sh b/manywheel/build_libtorch.sh
index 43a3334db..1dfbad031 100644
--- a/manywheel/build_libtorch.sh
+++ b/manywheel/build_libtorch.sh
@@ -343,7 +343,7 @@ for pkg in /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
     fi
 
     # zip up the wheel back
-    zip -rq $(basename $pkg) $PREIX*
+    zip -rq $(basename $pkg) $PREFIX*
 
     # replace original wheel
     rm -f $pkg

From 7e1f31c83cade651c04ee525da16dfdbe78e4bab Mon Sep 17 00:00:00 2001
From: Sergii Dymchenko <kit1980@gmail.com>
Date: Thu, 16 Nov 2023 14:39:03 -0800
Subject: [PATCH 126/212] desired_cuda -> DESIRED_CUDA (#1612)

* desired_cuda -> DESIRED_CUDA

Found with shellcheck

* Update manywheel/build_cuda.sh

Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com>

---------

Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com>
---
 manywheel/build_cuda.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 31f4e263b..768f19cee 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -47,7 +47,7 @@ if [[ -n "$DESIRED_CUDA" ]]; then
     # There really has to be a better way to do this - eli
     # Possibly limiting builds to specific cuda versions be delimiting images would be a choice
     if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-        echo "Switching to CUDA version $desired_cuda"
+        echo "Switching to CUDA version ${DESIRED_CUDA}"
         /builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
     fi
 else

From e584c27214c97af9162923c93bf2ac421c0523b2 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 16 Nov 2023 18:24:43 -0500
Subject: [PATCH 127/212] [BE] Cleanup build unused code (#1613)

1. Upload Scripts are not used anymore. We use Github Action upload workflows
2. M1 Builds are now automated
3. build_all.bat run git grep in pytorch and builder - No result
---
 build_m1_domains.sh       |  45 -----------------
 conda/upload.sh           |  28 ----------
 manywheel/upload.sh       |  44 ----------------
 windows/build_all.bat     | 104 --------------------------------------
 windows/build_pytorch.bat |   2 -
 5 files changed, 223 deletions(-)
 delete mode 100755 build_m1_domains.sh
 delete mode 100755 conda/upload.sh
 delete mode 100755 manywheel/upload.sh
 delete mode 100755 windows/build_all.bat

diff --git a/build_m1_domains.sh b/build_m1_domains.sh
deleted file mode 100755
index e574cb005..000000000
--- a/build_m1_domains.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/zsh
-# Script used to build domain libraries wheels for M1
-source ~/miniconda3/etc/profile.d/conda.sh
-set -ex
-TORCH_VERSION=1.11.0
-TORCHVISION_VERSION=0.12.0
-TORCHAUDIO_VERSION=0.11.0
-TORCHTEXT_VERSION=0.12.0
-
-for PYTHON_VERSION in 3.8 3.9 3.10; do
-  PY_VERSION=${PYTHON_VERSION/.}
-  conda create -yn whl-py${PY_VERSION}-torch-${TORCH_VERSION} python=${PYTHON_VERSION} numpy libpng openjpeg wheel pkg-config
-  conda activate whl-py${PY_VERSION}-torch-${TORCH_VERSION}
-  python3 -mpip install torch --extra-index-url=https://download.pytorch.org/whl/test torch==${TORCH_VERSION}
-  python3 -mpip install delocate
-
-  pushd ~/git/pytorch/vision
-  git checkout release/${TORCHVISION_VERSION%.*}
-  rm -rf build
-  BUILD_VERSION=${TORCHVISION_VERSION} python3 setup.py bdist_wheel
-  WHL_NAME=torchvision-${TORCHVISION_VERSION}-cp${PY_VERSION}-cp${PY_VERSION}-macosx_11_0_arm64.whl
-  DYLD_FALLBACK_LIBRARY_PATH="$(dirname $(dirname $(which python)))/lib" delocate-wheel -v --ignore-missing-dependencies dist/${WHL_NAME}
-  python3 -mpip install dist/${WHL_NAME}
-  popd
-
-  pushd ~/git/pytorch/audio
-  git checkout release/${TORCHAUDIO_VERSION%.*}
-  rm -rf build
-  BUILD_VERSION=${TORCHAUDIO_VERSION} python3 setup.py bdist_wheel
-  WHL_NAME=torchaudio-${TORCHAUDIO_VERSION}-cp${PY_VERSION}-cp${PY_VERSION}-macosx_11_0_arm64.whl
-  python3 -mpip install dist/${WHL_NAME}
-  popd
-
-  pushd ~/git/pytorch/text
-  git checkout release/${TORCHTEXT_VERSION%.*}
-  rm -rf build
-  BUILD_VERSION=${TORCHTEXT_VERSION} python3 setup.py bdist_wheel
-  WHL_NAME=torchtext-${TORCHTEXT_VERSION}-cp${PY_VERSION}-cp${PY_VERSION}-macosx_11_0_arm64.whl
-  python3 -mpip install dist/${WHL_NAME}
-  popd
-
-  python -c "import torch;import torchvision;print('Is torchvision useable?', all(x is not None for x in [torch.ops.image.decode_png, torch.ops.torchvision.roi_align]))"
-  python -c "import torch;import torchaudio;torchaudio.set_audio_backend('sox_io')"
-done
-
diff --git a/conda/upload.sh b/conda/upload.sh
deleted file mode 100755
index 404ee77e7..000000000
--- a/conda/upload.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-# Upload linux conda packages (from inside the docker)
-echo "Trying to upload conda packages from $HOST_PACKAGE_DIR"
-if [[ -n "$HOST_PACKAGE_DIR" && -d "$HOST_PACKAGE_DIR" ]]; then
-    ls "$HOST_PACKAGE_DIR" | xargs -I {} anaconda upload "$HOST_PACKAGE_DIR"/{} -u pytorch --label main
-else
-    echo "Couldn't find $HOST_PACKAGE_DIR"
-fi
-
-# Upload linux conda packages (from outside the docker)
-# This env variable should only be populated if this was called by cron/upload.sh
-echo "Trying to upload conda packages from ${today}/conda_pkgs"
-if [[ -n "$today" && -d "${today}/conda_pkgs" ]]; then
-    ls "${today}/conda_pkgs" | xargs -I {} anaconda upload "${today}/conda_pkgs"/{} -u pytorch --label main
-else
-    echo "Couldn't find ${today}/conda_pkgs"
-fi
-
-# Upload mac conda packages
-echo "Trying to upload conda packages from $MAC_CONDA_FINAL_FOLDER"
-if [[ -n "$MAC_CONDA_FINAL_FOLDER" && -d "$MAC_CONDA_FINAL_FOLDER" ]]; then
-    ls "$MAC_CONDA_FINAL_FOLDER" | xargs -I {} anaconda upload "$MAC_CONDA_FINAL_FOLDER"/{} -u pytorch --label main
-else
-    echo "Couldn't find $MAC_CONDA_FINAL_FOLDER"
-fi
diff --git a/manywheel/upload.sh b/manywheel/upload.sh
deleted file mode 100755
index a0c7b5b85..000000000
--- a/manywheel/upload.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-set -ex
-
-# PIP_UPLOAD_FOLDER should end in a slash. This is to handle it being empty
-# (when uploading to e.g. whl/cpu/) and also to handle nightlies (when
-# uploading to e.g. /whl/nightly/cpu)
-
-if [[ -z "$PACKAGE_ROOT_DIR" ]]; then
-    PACKAGE_ROOT_DIR="$(pwd)"
-fi
-
-# Upload for all CUDA/cpu versions if not given one to use
-if [[ -z "$CUDA_VERSIONS" ]]; then
-    CUDA_VERSIONS=('cpu' 'cu90' 'cu100' 'cu101')
-fi
-
-# Make sure the user specifically refers to an upload folder
-if [[ -z "$PIP_UPLOAD_FOLDER" ]]; then
-    echo 'The upload folder is not set. We refuse to upload.'
-    echo 'Please set PIP_UPLOAD_FOLDER'
-    exit 1
-fi
-
-for cuda_ver in "${CUDA_VERSIONS[@]}"; do
-    s3_wheel_dir="s3://pytorch/whl/${PIP_UPLOAD_FOLDER}${cuda_ver}/"
-    s3_libtorch_dir="s3://pytorch/libtorch/${PIP_UPLOAD_FOLDER}${cuda_ver}/"
-    if [[ "$cuda_ver" == cpu ]]; then
-        wheel_dir="${PACKAGE_ROOT_DIR}/wheelhousecpu/"
-        libtorch_dir="${PACKAGE_ROOT_DIR}/libtorch_housecpu/"
-    else
-        wheel_dir="${PACKAGE_ROOT_DIR}/wheelhouse${cuda_ver:2:2}/"
-        libtorch_dir="${PACKAGE_ROOT_DIR}/libtorch_house${cuda_ver:2:2}/"
-    fi
-
-    # Upload the wheels to s3
-    if [[ -d "$wheel_dir" ]]; then
-        echo "Uploading all of: $(ls $wheel_dir) to $s3_wheel_dir"
-        ls "$wheel_dir" | xargs -I {} aws s3 cp "$wheel_dir"/{} "$s3_wheel_dir" --acl public-read
-    fi
-
-    if [[ -d "$libtorch_dir" ]]; then
-        echo "Uploading all of: $(ls $libtorch_dir) to $s3_libtorch_dir"
-        ls "$libtorch_dir" | xargs -I {} aws s3 cp "$libtorch_dir"/{} "$s3_libtorch_dir" --acl public-read
-    fi
-done
diff --git a/windows/build_all.bat b/windows/build_all.bat
deleted file mode 100755
index f60da8c76..000000000
--- a/windows/build_all.bat
+++ /dev/null
@@ -1,104 +0,0 @@
-@echo off
-
-if "%~1"=="" goto arg_error
-if "%~2"=="" goto arg_error
-if NOT "%~3"=="" goto arg_error
-goto arg_end
-
-:arg_error
-
-echo Illegal number of parameters. Pass pytorch version, build number
-exit /b 1
-
-:arg_end
-
-set PYTORCH_BUILD_VERSION=%~1
-set PYTORCH_BUILD_NUMBER=%~2
-
-REM Install Miniconda3
-set "CONDA_HOME=%CD%\conda"
-set "tmp_conda=%CONDA_HOME%"
-set "miniconda_exe=%CD%\miniconda.exe"
-rmdir /s /q conda
-del miniconda.exe
-curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%"
-call ..\conda\install_conda.bat
-
-set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
-set "ORIG_PATH=%PATH%"
-
-conda remove -n py36 --all -y || rmdir %CONDA_HOME%\envs\py36 /s
-conda remove -n py37 --all -y || rmdir %CONDA_HOME%\envs\py37 /s
-
-conda create -n py36 -y -q numpy=1.11 mkl=2018 pyyaml boto3 cmake ninja typing_extensions python=3.6
-conda create -n py37 -y -q numpy=1.11 mkl=2018 pyyaml boto3 cmake ninja typing_extensions python=3.7
-
-REM Install MKL
-rmdir /s /q mkl
-del mkl_2018.2.185.7z
-curl https://s3.amazonaws.com/ossci-windows/mkl_2018.2.185.7z -k -O
-7z x -aoa mkl_2018.2.185.7z -omkl
-set CMAKE_INCLUDE_PATH=%cd%\\mkl\\include
-set LIB=%cd%\\mkl\\lib;%LIB%
-
-REM Download MAGMA Files
-for %%p in (
-        cuda80
-        cuda90
-        cuda92
-       ) do (
-            rmdir /s /q magma_%%p_release
-            del magma_%%p_release.7z
-            curl -k https://s3.amazonaws.com/ossci-windows/magma_%%p_release_mkl_2018.2.185.7z -o magma_%%p_release.7z
-            7z x -aoa magma_%%p_release.7z -omagma_%%p_release
-       )
-
-REM Install sccache
-mkdir %CD%\\tmp_bin
-curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\\tmp_bin\\sccache.exe
-copy %CD%\\tmp_bin\\sccache.exe %CD%\\tmp_bin\\nvcc.exe
-
-set CUDA_NVCC_EXECUTABLE=%CD%\\tmp_bin\\nvcc
-set "PATH=%CD%\\tmp_bin;%PATH%"
-
-set PYTORCH_BINARY_BUILD=1
-set TH_BINARY_BUILD=1
-
-@setlocal EnableDelayedExpansion
-for %%v in (
-        py35
-        py36
-        py37
-       ) do (
-            REM Activate Python Environment
-            set "CONDA_LIB_PATH=%CONDA_HOME%\envs\%%v\Library\bin"
-            set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%"
-            pip install ninja
-            for %%c in (
-                cpu
-                80
-                90
-                92
-            ) do (
-                @setlocal
-
-                REM Set Flags
-                if NOT "%%c"=="cpu" (
-                    if NOT "%%c"=="92" (
-                        set MAGMA_HOME=%cd%\\magma_!CUDA_PREFIX!_release
-                    ) else (
-                        set MAGMA_HOME=%cd%\\magma_!CUDA_PREFIX!_release\magma_cuda92\magma\install
-                    )
-                    set CUDA_VERSION=%%c
-                    set CUDA_PREFIX=cuda!CUDA_VERSION!
-                ) else (
-                    set CUDA_PREFIX=cpu
-                )
-                call !CUDA_PREFIX!.bat
-                @endlocal
-            )
-       )
-
-@endlocal
-
-set "PATH=%ORIG_PATH%"
diff --git a/windows/build_pytorch.bat b/windows/build_pytorch.bat
index 635138735..d4a1249ed 100644
--- a/windows/build_pytorch.bat
+++ b/windows/build_pytorch.bat
@@ -74,8 +74,6 @@ set LIB=%cd%\mkl\lib;%LIB%
 
 :: Download MAGMA Files on CUDA builds
 set MAGMA_VERSION=2.5.4
-if "%CUDA_VERSION%" == "92" set MAGMA_VERSION=2.5.2
-if "%CUDA_VERSION%" == "100" set MAGMA_VERSION=2.5.2
 
 if "%DEBUG%" == "1" (
     set BUILD_TYPE=debug

From 5014b0f7698fffadd8edcb6011a72ee2395e3e0a Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 17 Nov 2023 09:25:02 -0500
Subject: [PATCH 128/212] Changes to pypi release promotion scripts introduced
 for 2.1.0 and 2.1.1 (#1614)

* Changes topypi release promotion scripts introduced during 2.1.1

* typo
---
 release/pypi/promote_pypi_to_production.sh |  1 +
 release/pypi/promote_pypi_to_staging.sh    | 18 +++++++++++++-----
 release/pypi/upload_pypi_to_staging.sh     |  2 +-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/release/pypi/promote_pypi_to_production.sh b/release/pypi/promote_pypi_to_production.sh
index b517cbedb..a14fbb637 100644
--- a/release/pypi/promote_pypi_to_production.sh
+++ b/release/pypi/promote_pypi_to_production.sh
@@ -34,3 +34,4 @@ promote_staging_binaries torch "${PYTORCH_VERSION}"
 promote_staging_binaries torchvision "${TORCHVISION_VERSION}"
 promote_staging_binaries torchaudio "${TORCHAUDIO_VERSION}"
 promote_staging_binaries torchtext "${TORCHTEXT_VERSION}"
+promote_staging_binaries torchdata "${TORCHDATA_VERSION}"
diff --git a/release/pypi/promote_pypi_to_staging.sh b/release/pypi/promote_pypi_to_staging.sh
index a9dbe535b..46cd958cd 100644
--- a/release/pypi/promote_pypi_to_staging.sh
+++ b/release/pypi/promote_pypi_to_staging.sh
@@ -23,26 +23,34 @@ upload_pypi_to_staging() {
 # Uncomment these to promote to pypi
 PYTORCH_LINUX_VERSION_SUFFIX="%2Bcu121.with.pypi.cudnn"
 LINUX_VERSION_SUFFIX="%2Bcu121"
-WIN_VERSION_SUFFIX="%2Bcpu"
+CPU_VERSION_SUFFIX="%2Bcpu"
 MACOS_X86_64="macosx_.*_x86_64"
 MACOS_ARM64="macosx_.*_arm64"
 
 PLATFORM="linux_x86_64"          VERSION_SUFFIX="${PYTORCH_LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torch "${PYTORCH_VERSION}"
 PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}"
-PLATFORM="win_amd64"             VERSION_SUFFIX="${WIN_VERSION_SUFFIX}"           upload_pypi_to_staging torch "${PYTORCH_VERSION}"
+PLATFORM="win_amd64"             VERSION_SUFFIX="${CPU_VERSION_SUFFIX}"           upload_pypi_to_staging torch "${PYTORCH_VERSION}"
 PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}" # intel mac
 PLATFORM="${MACOS_ARM64}"        VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}" # m1 mac
 
 PLATFORM="linux_x86_64"          VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 PLATFORM="linux_aarch64"         VERSION_SUFFIX=""                        upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
-PLATFORM="win_amd64"             VERSION_SUFFIX="${WIN_VERSION_SUFFIX}"   upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
+PLATFORM="win_amd64"             VERSION_SUFFIX="${CPU_VERSION_SUFFIX}"   upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""                        upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 PLATFORM="${MACOS_ARM64}"        VERSION_SUFFIX=""                        upload_pypi_to_staging torchvision "${TORCHVISION_VERSION}"
 
 PLATFORM="linux_x86_64"          VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
 PLATFORM="linux_aarch64"         VERSION_SUFFIX=""                        upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
-PLATFORM="win_amd64"             VERSION_SUFFIX="${WIN_VERSION_SUFFIX}"   upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
+PLATFORM="win_amd64"             VERSION_SUFFIX="${CPU_VERSION_SUFFIX}"   upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
 PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""                        upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
 PLATFORM="${MACOS_ARM64}"        VERSION_SUFFIX=""                        upload_pypi_to_staging torchaudio "${TORCHAUDIO_VERSION}"
 
-upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}"
+PLATFORM="linux_x86" VERSION_SUFFIX="${CPU_VERSION_SUFFIX}" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}"
+PLATFORM="win_amd64" VERSION_SUFFIX="${CPU_VERSION_SUFFIX}" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}"
+PLATFORM="${MACOS_X86_64}" VERSION_SUFFIX="" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}"
+PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}"
+
+PLATFORM="manylinux2014_x86_64" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
+PLATFORM="win_amd64" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
+PLATFORM="${MACOS_X86_64}" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
+PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
diff --git a/release/pypi/upload_pypi_to_staging.sh b/release/pypi/upload_pypi_to_staging.sh
index 250b231f9..b1a7ddf6d 100644
--- a/release/pypi/upload_pypi_to_staging.sh
+++ b/release/pypi/upload_pypi_to_staging.sh
@@ -43,7 +43,7 @@ fi
 for pkg in ${pkgs_to_promote}; do
     pkg_basename="$(basename "${pkg}")"
 
-    if [[ "${pkg}" != *aarch64* ]]; then
+    if [[ "${pkg}" != *aarch64* && "${pkg}" != *torchdata* ]]; then
         # sub out linux for manylinux1
         pkg_basename="$(basename "${pkg//linux/manylinux1}")"
     elif [[ "${pkg}" == *manylinux_2_17_aarch64* ]]; then

From c3e0f559d5eeeea3546ec1357112fffa9d5452b2 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Fri, 17 Nov 2023 09:21:36 -0800
Subject: [PATCH 129/212] Pin miniconda version for Windows

To Miniconda3-py311_23.9.0-0-Windows-x86_64.exe
---
 windows/build_pytorch.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/windows/build_pytorch.bat b/windows/build_pytorch.bat
index d4a1249ed..37e19f933 100644
--- a/windows/build_pytorch.bat
+++ b/windows/build_pytorch.bat
@@ -44,7 +44,7 @@ set "tmp_conda=%CONDA_HOME%"
 set "miniconda_exe=%CD%\miniconda.exe"
 rmdir /s /q conda
 del miniconda.exe
-curl --retry 3 -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%"
+curl --retry 3 -k https://repo.anaconda.com/miniconda/Miniconda3-py311_23.9.0-0-Windows-x86_64.exe -o "%miniconda_exe%"
 call ..\conda\install_conda.bat
 if ERRORLEVEL 1 exit /b 1
 set "ORIG_PATH=%PATH%"

From 0cd5228907e627658a24c414b3c8d054e77fed99 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 21 Nov 2023 18:52:37 +0000
Subject: [PATCH 130/212] Fix poetry and pypi validations when version is
 specified (#1622)

* test (#1617)

Fix validations (#1618)

* test

* poetry_fix

* test

Fix validations (#1619)

* test

* poetry_fix

* test

* test

* restrict
---
 .github/scripts/validate_pipy.sh              | 12 ++++++---
 .github/scripts/validate_poetry.sh            | 25 ++++++-------------
 .github/workflows/validate-binaries.yml       |  2 +-
 .github/workflows/validate-linux-binaries.yml |  5 +++-
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/.github/scripts/validate_pipy.sh b/.github/scripts/validate_pipy.sh
index ed7915079..d7a930899 100644
--- a/.github/scripts/validate_pipy.sh
+++ b/.github/scripts/validate_pipy.sh
@@ -2,15 +2,21 @@ conda create -yn ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
 conda activate ${ENV_NAME}_pypi
 
 TEST_SUFFIX=""
+RELEASE_SUFFIX=""
+# if RELESE version is passed as parameter - install speific version
+if [[ ! -z ${RELEASE_VERSION} ]]; then
+    RELEASE_SUFFIX="==${RELEASE_VERSION}"
+fi
+
 if [[ ${TORCH_ONLY} == 'true' ]]; then
     TEST_SUFFIX=" --package torchonly"
-    pip3 install --pre torch --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+    pip3 install --pre torch${RELEASE_SUFFIX} --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
 else
     if [[ ${MATRIX_CHANNEL} != "release" ]]; then
-        pip3 install --pre torch --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+        pip3 install --pre torch${RELEASE_SUFFIX}  --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
         pip3 install --pre torchvision torchaudio --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
     else
-        pip3 install torch torchvision torchaudio
+        pip3 install torch${RELEASE_SUFFIX}  torchvision torchaudio
     fi
 fi
 
diff --git a/.github/scripts/validate_poetry.sh b/.github/scripts/validate_poetry.sh
index 3c41d5b45..6b7fe2412 100644
--- a/.github/scripts/validate_poetry.sh
+++ b/.github/scripts/validate_poetry.sh
@@ -13,25 +13,16 @@ if [[ ${TORCH_ONLY} == 'true' ]]; then
     TEST_SUFFIX=" --package torchonly"
 fi
 
-if [[ ${MATRIX_CHANNEL} != "release" ]]; then
-    # Installing poetry from our custom repo. We need to configure it before use and disable authentication
-    export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
-    poetry source add --priority=explicit domains "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
-    poetry source add --priority=supplemental pytorch-channel "https://download.pytorch.org/whl/${MATRIX_CHANNEL}"
-    poetry source add --priority=supplemental pytorch "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
-    poetry --quiet add --source pytorch torch
+RELEASE_SUFFIX=""
+# if RELESE version is passed as parameter - install speific version
+if [[ ! -z ${RELEASE_VERSION} ]]; then
+    RELEASE_SUFFIX="@${RELEASE_VERSION}"
+fi
 
-    if [[ ${TORCH_ONLY} != 'true' ]]; then
-        poetry --quiet add --source domains torchvision torchaudio
-    fi
+if [[ ${TORCH_ONLY} == 'true' ]]; then
+    poetry --quiet add torch${RELEASE_SUFFIX}
 else
-    export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
-    poetry source add --priority=explicit pytorch "https://download.pytorch.org/whl/${MATRIX_DESIRED_CUDA}"
-    if [[ ${TORCH_ONLY} == 'true' ]]; then
-        poetry --quiet add torch
-    else
-        poetry --quiet add --source pytorch torch torchaudio torchvision
-    fi
+    poetry --quiet add torch${RELEASE_SUFFIX} torchaudio torchvision
 fi
 
 python ../test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
index f6f73b012..fee16dca9 100644
--- a/.github/workflows/validate-binaries.yml
+++ b/.github/workflows/validate-binaries.yml
@@ -114,7 +114,7 @@ jobs:
       ref: ${{ inputs.ref || github.ref }}
       torchonly: ${{ inputs.torchonly }}
       version: ${{ inputs.version }}
-      release-matrix: ${{ needs.generate-rlease-matrix.outputs.matrix }}
+      release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
 
   mac-arm64:
     if:  inputs.os == 'macos' || inputs.os == 'all'
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index dcbfc93a0..aedffeef2 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -93,7 +93,10 @@ jobs:
         # Special case PyPi installation package. And Install of PyPi package via poetry
         if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && ${MATRIX_GPU_ARCH_VERSION} == "12.1" ]]; then
           source ./.github/scripts/validate_pipy.sh
-          source ./.github/scripts/validate_poetry.sh
+
+          if [[ ${MATRIX_CHANNEL} == "release" ]]; then
+            source ./.github/scripts/validate_poetry.sh
+          fi
         fi
 
         # Standart case: Validate binaries

From 4db3d68b5a4c3b87148ba394d31960ff805c3850 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 21 Nov 2023 23:42:20 +0000
Subject: [PATCH 131/212] Validate pypi build only for release (#1623)

---
 .github/scripts/validate_pipy.sh              | 9 ++-------
 .github/workflows/validate-linux-binaries.yml | 7 ++-----
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/.github/scripts/validate_pipy.sh b/.github/scripts/validate_pipy.sh
index d7a930899..5858e4c28 100644
--- a/.github/scripts/validate_pipy.sh
+++ b/.github/scripts/validate_pipy.sh
@@ -10,14 +10,9 @@ fi
 
 if [[ ${TORCH_ONLY} == 'true' ]]; then
     TEST_SUFFIX=" --package torchonly"
-    pip3 install --pre torch${RELEASE_SUFFIX} --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
+    pip3 install torch${RELEASE_SUFFIX}
 else
-    if [[ ${MATRIX_CHANNEL} != "release" ]]; then
-        pip3 install --pre torch${RELEASE_SUFFIX}  --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}_pypi_cudnn"
-        pip3 install --pre torchvision torchaudio --extra-index-url "https://download.pytorch.org/whl/${MATRIX_CHANNEL}/${MATRIX_DESIRED_CUDA}"
-    else
-        pip3 install torch${RELEASE_SUFFIX}  torchvision torchaudio
-    fi
+    pip3 install torch${RELEASE_SUFFIX} torchvision torchaudio
 fi
 
 python ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index aedffeef2..1b84eaa31 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -91,12 +91,9 @@ jobs:
         cat release_matrix.json
 
         # Special case PyPi installation package. And Install of PyPi package via poetry
-        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && ${MATRIX_GPU_ARCH_VERSION} == "12.1" ]]; then
+        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} == "release"]]; then
           source ./.github/scripts/validate_pipy.sh
-
-          if [[ ${MATRIX_CHANNEL} == "release" ]]; then
-            source ./.github/scripts/validate_poetry.sh
-          fi
+          source ./.github/scripts/validate_poetry.sh
         fi
 
         # Standart case: Validate binaries

From 56556d0aaca4da61c0497608b9136b058573c8d6 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 21 Nov 2023 23:52:33 +0000
Subject: [PATCH 132/212] Validate pypi build only for release (#1624)

---
 .github/workflows/validate-linux-binaries.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 1b84eaa31..d1c6c29bd 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -88,10 +88,9 @@ jobs:
         export TARGET_OS="linux"
         eval "$(conda shell.bash hook)"
         printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
-        cat release_matrix.json
 
         # Special case PyPi installation package. And Install of PyPi package via poetry
-        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} == "release"]]; then
+        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} == "release" ]]; then
           source ./.github/scripts/validate_pipy.sh
           source ./.github/scripts/validate_poetry.sh
         fi

From 12f6acd63456dafae70b6273659edf557d68c9fa Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 21 Nov 2023 17:09:20 -0800
Subject: [PATCH 133/212] [Manywheel] Do not hardcode triton version

---
 manywheel/build_cuda.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 768f19cee..b09abfea4 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -267,11 +267,12 @@ fi
 # and torch.compile doesn't work.
 if [[ $(uname) == "Linux" && "$DESIRED_PYTHON" != "3.12" ]]; then
     TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.github/ci_commit_pins/triton.txt)
+    TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
 
     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
-        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="pytorch-triton==2.1.0+${TRITON_SHORTHASH}"
+        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}"
     else
-        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | pytorch-triton==2.1.0+${TRITON_SHORTHASH}"
+        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}"
     fi
 fi
 

From fb8aae9a0b7badf574b5fd2f436024f6b10d8c14 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 21 Nov 2023 17:12:46 -0800
Subject: [PATCH 134/212] [Manywheel][BE] Dedup Triton requirement spec

---
 manywheel/build_cuda.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index b09abfea4..4938ffef2 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -268,11 +268,12 @@ fi
 if [[ $(uname) == "Linux" && "$DESIRED_PYTHON" != "3.12" ]]; then
     TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.github/ci_commit_pins/triton.txt)
     TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
+    TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}"
 
     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
-        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}"
+        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
     else
-        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}"
+        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}"
     fi
 fi
 

From 2a750ebb877695947146a307b22cf27b6c0d0302 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 21 Nov 2023 22:43:00 -0800
Subject: [PATCH 135/212] [Manywheel] Restrict `pytorch-triton` to x86-64 Linux

Partially addresses https://github.com/pytorch/pytorch/issues/114042
---
 manywheel/build_cuda.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 4938ffef2..9919247ed 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -268,7 +268,7 @@ fi
 if [[ $(uname) == "Linux" && "$DESIRED_PYTHON" != "3.12" ]]; then
     TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.github/ci_commit_pins/triton.txt)
     TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
-    TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}"
+    TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}; platform_system == 'Linux' and platform_machine == 'x86_64'"
 
     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
         export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"

From 6b8c73fecb93f06f3c18364cec9d3714f99bc479 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Wed, 29 Nov 2023 18:39:22 -0800
Subject: [PATCH 136/212] Tweak py312 conda requirements

---
 conda/pytorch-nightly/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 882f13da0..f51339d53 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -31,10 +31,10 @@ requirements:
     - numpy=1.19 # [py <= 39]
     - numpy=1.21.5 # [py == 310]
     - numpy=1.23.5 # [py == 311]
-    - numpy=1.26.1 # [py >= 312]
+    - numpy=1.26.0 # [py >= 312]
     - openssl=1.1.1l # [py >= 38 and py <= 310 and linux]
     - openssl=1.1.1s # [py == 311 and linux]
-    - openssl=3.1.4 # [py >= 312 and linux]
+    - openssl=3.0.12 # [py >= 312 and linux]
 {{ environ.get('PYTORCH_LLVM_PACKAGE', '    - llvmdev=9') }}
 {{ environ.get('MAGMA_PACKAGE', '') }}
 

From 3c7404d80c24d2b59f0a15d818eff2806b19f216 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Wed, 29 Nov 2023 19:41:02 -0800
Subject: [PATCH 137/212] Build PyTorch without TLS for 3.12

Because GLOO still expect OpenSSL-1, but 3.12 is build with OpenSSL-3
---
 conda/build_pytorch.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 029372303..77789a17b 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -303,6 +303,11 @@ fi
 
 # Loop through all Python versions to build a package for each
 for py_ver in "${DESIRED_PYTHON[@]}"; do
+    # TODO: Enable TLS support for 3.12 builds (or disable it for the rest
+    if [[ "$(uname)" == 'Linux' && "${py_ver}" == '3.12' ]]; then
+      export USE_GLOO_WITH_OPENSSL=0
+    fi
+
     build_string="py${py_ver}_${build_string_suffix}"
     folder_tag="${build_string}_$(date +'%Y%m%d')"
 

From 88457a11da457ad52c2a8847503cfb39f8071c49 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Wed, 29 Nov 2023 20:34:48 -0800
Subject: [PATCH 138/212] [conda] Skip sympy for 3.12

As at the moment it is only available for Windows %)
---
 conda/pytorch-nightly/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index f51339d53..16cc1bd83 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -49,7 +49,7 @@ requirements:
     # Before a decent fix, force llvm-openmp version <16.
     - llvm-openmp <16 # [linux]
     - typing_extensions
-    - sympy
+    - sympy # [py < 312]
     - filelock
     - networkx
     - jinja2

From ca378c16f88781e92a7005c65a3473e3ddca5be3 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Wed, 29 Nov 2023 20:37:57 -0800
Subject: [PATCH 139/212] [conda] Do not depend on triton for 3.12 yet

---
 conda/build_pytorch.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 77789a17b..4c2e4836b 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -276,7 +276,7 @@ else
     if [[ "$OSTYPE" != "msys" ]]; then
         # TODO: Remove me when Triton has a proper release channel
         TRITON_SHORTHASH=$(cut -c1-10 $pytorch_rootdir/.github/ci_commit_pins/triton.txt)
-        export CONDA_TRITON_CONSTRAINT="    - torchtriton==2.1.0+${TRITON_SHORTHASH}"
+        export CONDA_TRITON_CONSTRAINT="    - torchtriton==2.1.0+${TRITON_SHORTHASH} # [py < 312]"
     fi
 
     build_string_suffix="cuda${CUDA_VERSION}_cudnn${CUDNN_VERSION}_${build_string_suffix}"

From fc773dde9764b1be1a1a0e8a1b1df581582013ec Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Thu, 30 Nov 2023 14:47:10 -0800
Subject: [PATCH 140/212] Tweak mkl requirements for win+py312

---
 conda/pytorch-nightly/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 16cc1bd83..93ec533a9 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -22,8 +22,8 @@ requirements:
     {% if cross_compile_arm64 == 0 %}
     - mkl-include # [x86_64]
     - mkl=2020.2 # [py <= 311 and x86_64 and not win]
-    - mkl=2023.1 # [py >= 312 and x86_64 and not win]
-    - mkl=2021.4 # [x86_64 and win]
+    - mkl=2023.1 # [py >= 312 and x86_64]
+    - mkl=2021.4 # [x86_64 and win and py <= 311]
     {% endif %}
     - typing_extensions
     - ninja

From 8e5151cb2beb7c7c8fda1b64eaf468e5ec585fb5 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 1 Dec 2023 10:23:03 -0800
Subject: [PATCH 141/212] Add aarch64 conda env lib to LD_LIBRARY_PATH (#1628)

After the change on https://github.com/pytorch/builder/pull/1586, nightly aarch64 wheel fails to find `libopenblas.so` which is now installed under `/opt/conda/envs/aarch64_env/lib/` instead of the base conda `/opt/conda/lib`.  Using CPU nightly wheels on aarch64 from Nov 16 then ends up with the error as described in https://github.com/pytorch/pytorch/issues/114862: `Calling torch.geqrf on a CPU tensor requires compiling PyTorch with LAPACK. Please use PyTorch built with LAPACK support`.  The error can be found on night build log https://github.com/pytorch/pytorch/actions/runs/6887666324/job/18735230109#step:15:4933

Fixes https://github.com/pytorch/pytorch/issues/114862

I double check `2.1.[0-1]` and the current RC for 2.1.2, the issue is not there because https://github.com/pytorch/builder/pull/1586 only change builder main, thus impacting nightly.

### Testing

Build nightly wheel manually on aarch64 runner and confirm that openblas is detected correctly:

```
-- Found a library with BLAS API (open). Full path: (/opt/conda/envs/aarch64_env/lib/libopenblas.so)
...
--   USE_BLAS              : 1
--     BLAS                : open
--     BLAS_HAS_SBGEMM     :
--   USE_LAPACK            : 1
--     LAPACK              : open
...
```
---
 aarch64_linux/aarch64_ci_setup.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh
index f10e49b40..88029c940 100755
--- a/aarch64_linux/aarch64_ci_setup.sh
+++ b/aarch64_linux/aarch64_ci_setup.sh
@@ -6,8 +6,9 @@ set -eux -o pipefail
 
 CONDA_PYTHON_EXE=/opt/conda/bin/python
 CONDA_EXE=/opt/conda/bin/conda
+CONDA_ENV_NAME=aarch64_env
 PATH=/opt/conda/bin:$PATH
-LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
+LD_LIBRARY_PATH=/opt/conda/envs/${CONDA_ENV_NAME}/lib/:/opt/conda/lib:$LD_LIBRARY_PATH
 
 ###############################################################################
 # Install conda
@@ -21,8 +22,8 @@ chmod +x /mambaforge.sh
 rm /mambaforge.sh
 source /opt/conda/etc/profile.d/conda.sh
 conda config --set ssl_verify False
-conda create -y -c conda-forge -n aarch64_env python=${DESIRED_PYTHON}
-conda activate aarch64_env
+conda create -y -c conda-forge -n "${CONDA_ENV_NAME}" python=${DESIRED_PYTHON}
+conda activate "${CONDA_ENV_NAME}"
 
 if [[ "$DESIRED_PYTHON"  == "3.8" ]]; then
     NUMPY_VERSION="1.24.4"

From 6ce30be4cb65075cedd69e8460cb4529dd9c7f39 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Fri, 1 Dec 2023 18:46:47 -0800
Subject: [PATCH 142/212] Revert "[conda] Skip sympy for 3.12"

This reverts commit 88457a11da457ad52c2a8847503cfb39f8071c49.
As sympy has been updated to 1.12 and it now supports Python-3.12
---
 conda/pytorch-nightly/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 93ec533a9..05be9c7d7 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -49,7 +49,7 @@ requirements:
     # Before a decent fix, force llvm-openmp version <16.
     - llvm-openmp <16 # [linux]
     - typing_extensions
-    - sympy # [py < 312]
+    - sympy
     - filelock
     - networkx
     - jinja2

From b92da8cd64c9db81effcb8a48992e69ecb4d6346 Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Fri, 1 Dec 2023 21:00:56 -0600
Subject: [PATCH 143/212] [aarch64] ACL, OpenBLAS and mkldnn updates for
 PyTorch 2.2 (#1627)

Note# ~~This PR has a dependency on updating the oneDNN version to v3.3 (via ideep submodule to v3.3)~~
ideep submodule update is done, so, this PR can be merged anytime now.

This PR is for:
ACL - build with fixed format kernels
OpenBLAS - upgrade the version to 0.3.25
numpy - upgrade version to 1.26.2
and mkldnn - cleanup the patches that are already upstreamed.
---
 aarch64_linux/aarch64_ci_setup.sh             |  4 +--
 aarch64_linux/aarch64_wheel_ci_build.py       |  6 ++--
 aarch64_linux/build_aarch64_wheel.py          |  7 ++---
 ...4-fix-default-build-flags-to-armv8-a.patch | 29 -------------------
 4 files changed, 7 insertions(+), 39 deletions(-)
 delete mode 100644 mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch

diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh
index 88029c940..53c8a5320 100755
--- a/aarch64_linux/aarch64_ci_setup.sh
+++ b/aarch64_linux/aarch64_ci_setup.sh
@@ -28,9 +28,9 @@ conda activate "${CONDA_ENV_NAME}"
 if [[ "$DESIRED_PYTHON"  == "3.8" ]]; then
     NUMPY_VERSION="1.24.4"
 else
-    NUMPY_VERSION="1.26.0"
+    NUMPY_VERSION="1.26.2"
 fi
-conda install -y -c conda-forge numpy==${NUMPY_VERSION} pyyaml==6.0.1 patchelf==0.17.2 pygit2==1.13.2 openblas==0.3.24 ninja==1.11.1 scons==4.5.2
+conda install -y -c conda-forge numpy==${NUMPY_VERSION} pyyaml==6.0.1 patchelf==0.17.2 pygit2==1.13.2 openblas==0.3.25 ninja==1.11.1 scons==4.5.2
 
 python --version
 conda --version
diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 3fc86053c..3b772847c 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -20,9 +20,9 @@ def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
     '''
     print('Building Arm Compute Library')
     os.system("cd / && mkdir /acl")
-    os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.05.1 {git_clone_flags}")
+    os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
     os.system("cd ComputeLibrary; export acl_install_dir=/acl; "
-              "scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 build=native build_dir=$acl_install_dir/build; "
+              "scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 fixed_format_kernels=1 build=native build_dir=$acl_install_dir/build; "
               "cp -r arm_compute $acl_install_dir; "
               "cp -r include $acl_install_dir; "
               "cp -r utils $acl_install_dir; "
@@ -106,8 +106,6 @@ def parse_arguments():
         print("build pytorch without mkldnn backend")
 
     # work around to fix Raspberry pie crash
-    print("Applying mkl-dnn patch to fix Raspberry pie crash")
-    os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
     print("Applying mkl-dnn patch to fix readdir crash")
     os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-readdir-crash.patch")
     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index dd43bf218..9efd2e6ae 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -219,15 +219,15 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None:
 
 def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None:
     print('Building OpenBLAS')
-    host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.20 {git_clone_flags}")
+    host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.25 {git_clone_flags}")
     make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8"
     host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS")
 
 
 def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None:
     print('Building Arm Compute Library')
-    acl_build_flags="debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 build=native"
-    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.05.1 {git_clone_flags}")
+    acl_build_flags="debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 fixed_format_kernels=1 build=native"
+    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
 
 
@@ -555,7 +555,6 @@ def start_build(host: RemoteHost, *,
         print("build pytorch with mkldnn+acl backend")
         build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
         host.run_cmd(f"cd $HOME && git clone https://github.com/pytorch/builder.git")
-        host.run_cmd(f"cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch")
         host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}")
         print('Repair the wheel')
         pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
diff --git a/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch b/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch
deleted file mode 100644
index f6e91010a..000000000
--- a/mkldnn_fix/aarch64-fix-default-build-flags-to-armv8-a.patch
+++ /dev/null
@@ -1,29 +0,0 @@
----
- cmake/platform.cmake | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/cmake/platform.cmake b/cmake/platform.cmake
-index 8630460ce..602eafe8e 100644
---- a/cmake/platform.cmake
-+++ b/cmake/platform.cmake
-@@ -198,7 +198,7 @@ elseif(UNIX OR MINGW)
-              endif()
-              # For native compilation tune for the host processor
-              if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
--                 append(DEF_ARCH_OPT_FLAGS "-mcpu=native")
-+                 append(DEF_ARCH_OPT_FLAGS "-march=armv8-a")
-              endif()
-         elseif(DNNL_TARGET_ARCH STREQUAL "PPC64")
-              if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
-@@ -295,7 +295,7 @@ elseif(UNIX OR MINGW)
-             endif()
-             # For native compilation tune for the host processor
-             if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
--                append(DEF_ARCH_OPT_FLAGS "-mcpu=native")
-+                append(DEF_ARCH_OPT_FLAGS "-march=armv8-a")
-             endif()
-         elseif(DNNL_TARGET_ARCH STREQUAL "PPC64")
-             if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
--- 
-2.34.1
-

From 3723ee76b112d63b6623103876d936e95ad98876 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 6 Dec 2023 23:03:02 +0000
Subject: [PATCH 144/212] Validation scripts, install using version (#1633)

---
 .github/scripts/validate_binaries.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 0c01dbca2..acdcef5e5 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -12,6 +12,13 @@ else
         TEST_SUFFIX=" --package torchonly"
     fi
 
+    # if RELESE version is passed as parameter - install speific version
+    if [[ ! -z ${RELEASE_VERSION} ]]; then
+          INSTALLATION=${INSTALLATION/"torch "/"torch==${RELEASE_VERSION} "}
+          INSTALLATION=${INSTALLATION/"-y pytorch "/"-y pytorch==${RELEASE_VERSION} "}
+          INSTALLATION=${INSTALLATION/"::pytorch "/"::pytorch==${RELEASE_VERSION} "}
+    fi
+
     export OLD_PATH=${PATH}
     # Workaround macos-arm64 runners. Issue: https://github.com/pytorch/test-infra/issues/4342
     if [[ ${TARGET_OS} == 'macos-arm64' ]]; then

From 60169e3cdd1e3b4555b4386cc62f85b6564c3f01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ionu=C8=9B=20Man=C8=9Ba?= <ionut@janeasystems.com>
Date: Fri, 8 Dec 2023 07:09:30 +0200
Subject: [PATCH 145/212] Test Windows static lib (#1465)

Add support for testing Windows Cuda static lib
---
 test_example_code/CMakeLists.txt     |  26 +++++
 windows/internal/static_lib_test.bat | 137 +++++++++++++++++++++++++++
 2 files changed, 163 insertions(+)
 create mode 100644 test_example_code/CMakeLists.txt
 create mode 100644 windows/internal/static_lib_test.bat

diff --git a/test_example_code/CMakeLists.txt b/test_example_code/CMakeLists.txt
new file mode 100644
index 000000000..1724a6ed0
--- /dev/null
+++ b/test_example_code/CMakeLists.txt
@@ -0,0 +1,26 @@
+cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
+project(simple-torch-test)
+
+find_package(Torch REQUIRED)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
+
+
+add_executable(simple-torch-test simple-torch-test.cpp)
+target_include_directories(simple-torch-test PRIVATE  ${TORCH_INCLUDE_DIRS})
+target_link_libraries(simple-torch-test "${TORCH_LIBRARIES}")
+set_property(TARGET simple-torch-test PROPERTY CXX_STANDARD 17)
+
+find_package(CUDAToolkit 11.8)
+
+target_link_libraries(simple-torch-test CUDA::cudart CUDA::cufft CUDA::cusparse CUDA::cublas CUDA::cusolver)
+find_library(CUDNN_LIBRARY NAMES cudnn)
+target_link_libraries(simple-torch-test  ${CUDNN_LIBRARY} )
+if (MSVC)
+  file(GLOB TORCH_DLLS  "$ENV{CUDA_PATH}/bin/cudnn64_8.dll" "$ENV{NVTOOLSEXT_PATH}/bin/x64/*.dll")
+  message("dlls to copy "  ${TORCH_DLLS})
+  add_custom_command(TARGET simple-torch-test
+                     POST_BUILD
+                     COMMAND ${CMAKE_COMMAND} -E copy_if_different
+                     ${TORCH_DLLS}
+                     $<TARGET_FILE_DIR:simple-torch-test>)
+endif (MSVC)
diff --git a/windows/internal/static_lib_test.bat b/windows/internal/static_lib_test.bat
new file mode 100644
index 000000000..aa15dd68f
--- /dev/null
+++ b/windows/internal/static_lib_test.bat
@@ -0,0 +1,137 @@
+set SRC_DIR=%~dp0
+
+pushd %SRC_DIR%\..
+
+if "%CUDA_VERSION%" == "cpu" call internal\driver_update.bat
+if errorlevel 1 exit /b 1
+
+call internal\cuda_install.bat
+set LIB=%CUDA_PATH%\lib\x64;%LIB%
+if errorlevel 1 exit /b 1
+set "ORIG_PATH=%PATH%"
+
+setlocal EnableDelayedExpansion
+set NVIDIA_GPU_EXISTS=0
+for /F "delims=" %%i in ('wmic path win32_VideoController get name') do (
+    set GPUS=%%i
+    if not "x!GPUS:NVIDIA=!" == "x!GPUS!" (
+        SET NVIDIA_GPU_EXISTS=1
+        goto gpu_check_end
+    )
+)
+:gpu_check_end
+endlocal & set NVIDIA_GPU_EXISTS=%NVIDIA_GPU_EXISTS%
+
+:: Download MAGMA Files on CUDA builds
+set MAGMA_VERSION=2.5.4
+set CUDA_PREFIX=cuda%CUDA_VERSION%
+if "%CUDA_VERSION%" == "92" set MAGMA_VERSION=2.5.2
+if "%CUDA_VERSION%" == "100" set MAGMA_VERSION=2.5.2
+
+if "%DEBUG%" == "1" (
+    set BUILD_TYPE=debug
+) else (
+    set BUILD_TYPE=release
+)
+
+if not "%CUDA_VERSION%" == "cpu" (
+    rmdir /s /q magma_%CUDA_PREFIX%_%BUILD_TYPE%
+    del magma_%CUDA_PREFIX%_%BUILD_TYPE%.7z
+    curl -k https://s3.amazonaws.com/ossci-windows/magma_%MAGMA_VERSION%_%CUDA_PREFIX%_%BUILD_TYPE%.7z -o magma_%CUDA_PREFIX%_%BUILD_TYPE%.7z
+    7z x -aoa magma_%CUDA_PREFIX%_%BUILD_TYPE%.7z -omagma_%CUDA_PREFIX%_%BUILD_TYPE%
+	set LIB=%CD%\magma_%CUDA_PREFIX%_%BUILD_TYPE%\lib;%LIB%
+)
+
+echo "install conda package"
+
+:: Install Miniconda3
+set "CONDA_HOME=%CD%\conda"
+set "tmp_conda=%CONDA_HOME%"
+set "miniconda_exe=%CD%\miniconda.exe"
+
+rmdir /s /q conda
+del miniconda.exe
+curl -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%"
+call ..\conda\install_conda.bat
+if ERRORLEVEL 1 exit /b 1
+
+set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
+
+conda create -qyn testenv python=%DESIRED_PYTHON%
+if errorlevel 1 exit /b 1
+
+call %CONDA_HOME%\condabin\activate.bat testenv
+if errorlevel 1 exit /b 1
+
+call conda install  -y -q -c conda-forge libuv=1.39
+call conda install -y -q intel-openmp
+
+echo "install and test libtorch"
+pip install cmake
+echo "installing cmake"
+
+curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
+7z x -aoa mkl_2020.2.254.7z -omkl
+set LIB=%CD%\mkl\lib;%LIB%
+
+
+if "%VC_YEAR%" == "2019" powershell internal\vs2019_install.ps1
+if "%VC_YEAR%" == "2022" powershell internal\vs2022_install.ps1
+
+if ERRORLEVEL 1 exit /b 1
+
+for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do 7z x "%%i" -otmp
+if ERRORLEVEL 1 exit /b 1
+
+
+pushd tmp\libtorch
+
+set VC_VERSION_LOWER=17
+set VC_VERSION_UPPER=18
+IF "%VC_YEAR%" == "2019" (
+    set VC_VERSION_LOWER=16
+    set VC_VERSION_UPPER=17
+)
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VS15INSTALLDIR=%%i"
+        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
+        goto vswhere
+    )
+)
+
+:vswhere
+IF "%VS15VCVARSALL%"=="" (
+    echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch test on Windows
+    exit /b 1
+)
+call "%VS15VCVARSALL%" x64
+
+set install_root=%CD%
+set INCLUDE=%INCLUDE%;%install_root%\include;%install_root%\include\torch\csrc\api\include
+set LIB=%LIB%;%install_root%\lib\x64
+set PATH=%PATH%;%install_root%\lib
+
+
+cd %BUILDER_ROOT%\test_example_code\
+mkdir build
+cd build
+
+cmake -DCMAKE_PREFIX_PATH=%install_root% ..
+
+if ERRORLEVEL 1 exit /b 1
+
+cmake --build . --config Release
+
+.\Release\simple-torch-test.exe
+if ERRORLEVEL 1 exit /b 1
+
+popd
+
+echo Cleaning temp files
+rd /s /q "tmp" || ver > nul
+
+:end
+set "PATH=%ORIG_PATH%"
+popd

From 2a4c533360a1aa2dec292baa95e69c7ec6ecd166 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 8 Dec 2023 15:55:51 +0000
Subject: [PATCH 146/212] Pin windows intel-openmp to 2023.2.0 (#1635) (#1636)

---
 windows/condaenv.bat | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/windows/condaenv.bat b/windows/condaenv.bat
index cf1b2c865..4475b0568 100644
--- a/windows/condaenv.bat
+++ b/windows/condaenv.bat
@@ -12,9 +12,9 @@ FOR %%v IN (%DESIRED_PYTHON%) DO (
     if "%%v" == "3.7" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
     if "%%v" == "3.8" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
     if "%%v" == "3.9" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy>=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
-    if "%%v" == "3.10" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.21.3 "mkl=2020.2" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
-    if "%%v" == "3.11" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.23.4 "mkl=2020.2" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
-    if "%%v" == "3.12" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.26.0 "mkl=2023.1" pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
+    if "%%v" == "3.10" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.21.3 "mkl=2020.2" intel-openmp=2023.2.0 pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
+    if "%%v" == "3.11" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.23.4 "mkl=2020.2" intel-openmp=2023.2.0 pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
+    if "%%v" == "3.12" call conda create -n py!PYTHON_VERSION_STR! -y -q -c=conda-forge numpy=1.26.0 "mkl=2023.1" intel-openmp=2023.2.0 pyyaml boto3 "cmake=3.19.6" ninja typing_extensions python=%%v
     if "%%v" == "3" call conda create -n py!PYTHON_VERSION_STR! -y -q numpy=1.11 "mkl=2020.2" pyyaml boto3 cmake ninja typing_extensions python=%%v
 )
 endlocal

From c10e2547f19d6cf985d72a6c05389a63705321a1 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Sat, 9 Dec 2023 15:56:17 +0000
Subject: [PATCH 147/212] Torch compile test for python 3.8-3.11 linux only
 (#1629)

This should fix failure on with Python 3.12 validations:
https://github.com/pytorch/builder/actions/runs/7064433251/job/19232483984#step:11:4859
---
 test/smoke_test/smoke_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index a4cd6dff2..3b5b18c35 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -167,8 +167,8 @@ def smoke_test_cuda(package: str, runtime_error_check: str) -> None:
         print(f"torch cudnn: {torch.backends.cudnn.version()}")
         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
 
-        # torch.compile is available only on Linux and python 3.8-3.10
-        if sys.platform in ["linux", "linux2"] and (sys.version_info < (3, 11, 0) or channel != "release"):
+        # torch.compile is available only on Linux and python 3.8-3.11
+        if (sys.platform in ["linux", "linux2"]) and sys.version_info < (3, 12, 0):
             smoke_test_compile()
 
         if runtime_error_check == "enabled":

From f4144554d893b0bf62694617b9d5dc8977ca72ec Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Sat, 9 Dec 2023 10:20:23 -0600
Subject: [PATCH 148/212] [aarch64] cleanup mkldnn patching (#1630)

pytorch is moved to oneDNN v3.3.2 and some of the
 old patches are not applicable any more.
---
 aarch64_linux/aarch64_wheel_ci_build.py    |  3 ---
 aarch64_linux/build_aarch64_wheel.py       |  1 -
 mkldnn_fix/aarch64-fix-readdir-crash.patch | 14 --------------
 3 files changed, 18 deletions(-)
 delete mode 100644 mkldnn_fix/aarch64-fix-readdir-crash.patch

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 3b772847c..d3910f227 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -105,9 +105,6 @@ def parse_arguments():
     else:
         print("build pytorch without mkldnn backend")
 
-    # work around to fix Raspberry pie crash
-    print("Applying mkl-dnn patch to fix readdir crash")
-    os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-readdir-crash.patch")
     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
     pytorch_wheel_name = complete_wheel("pytorch")
     print(f"Build Compelete. Created {pytorch_wheel_name}..")
diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index 9efd2e6ae..d4fa6f8ad 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -554,7 +554,6 @@ def start_build(host: RemoteHost, *,
         build_ArmComputeLibrary(host, git_clone_flags)
         print("build pytorch with mkldnn+acl backend")
         build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
-        host.run_cmd(f"cd $HOME && git clone https://github.com/pytorch/builder.git")
         host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}")
         print('Repair the wheel')
         pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
diff --git a/mkldnn_fix/aarch64-fix-readdir-crash.patch b/mkldnn_fix/aarch64-fix-readdir-crash.patch
deleted file mode 100644
index 81d46d406..000000000
--- a/mkldnn_fix/aarch64-fix-readdir-crash.patch
+++ /dev/null
@@ -1,14 +0,0 @@
-diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp b/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp
-index cb800b2509..5516373b90 100644
---- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp
-+++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl.cpp
-@@ -170,6 +170,8 @@ int Cpu::getFilePathMaxTailNumPlus1(const char *path) {
-   fflush(stdout);
- 
-   DIR *dir = opendir(dir_path);
-+  if (dir == NULL)
-+    return 0;
-   struct dirent *dp;
- 
-   dp = readdir(dir);
-

From bafda5103ffe512e914adf6e2fb75b1a9ca943be Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Sat, 9 Dec 2023 08:37:00 -0800
Subject: [PATCH 149/212] Add `aarch64_linux` to the list of linted files

---
 .lintrunner.toml                     | 2 +-
 aarch64_linux/build_aarch64_wheel.py | 2 +-
 aarch64_linux/embed_library.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.lintrunner.toml b/.lintrunner.toml
index b7375092a..7d48258bc 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -2,7 +2,7 @@ merge_base_with = "origin/main"
 
 [[linter]]
 code = 'RUFF'
-include_patterns = ['test/smoke_test/*.py', 's3_management/*.py']
+include_patterns = ['test/smoke_test/*.py', 's3_management/*.py', 'aarch64_linux/*.py']
 command = [
     'python3',
     'tools/linter/adapters/ruff_linter.py',
diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index d4fa6f8ad..333b8b910 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -301,7 +301,7 @@ def build_torchvision(host: RemoteHost, *,
         # Remove .so files to force static linking
         host.run_cmd("rm miniforge3/lib/libpng.so miniforge3/lib/libpng16.so miniforge3/lib/libjpeg.so")
         # And patch setup.py to include libz dependency for libpng
-        host.run_cmd(['sed -i -e \'s/image_link_flags\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py'])
+        host.run_cmd(['sed -i -e \'s/image_link_flags\\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py'])
 
     build_vars = ""
     if branch == "nightly":
diff --git a/aarch64_linux/embed_library.py b/aarch64_linux/embed_library.py
index 978970d45..1a3114823 100644
--- a/aarch64_linux/embed_library.py
+++ b/aarch64_linux/embed_library.py
@@ -13,7 +13,7 @@
 
 
 def replace_tag(filename):
-   with open(filename, 'r') as f:
+   with open(filename) as f:
      lines = f.read().split("\\n")
    for i,line in enumerate(lines):
        if not line.startswith("Tag: "):

From bb9b32c00cdb3ddbb0ed7754adeb7220d44d031f Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Sat, 9 Dec 2023 09:00:18 -0800
Subject: [PATCH 150/212] Actually fix lint this type

---
 aarch64_linux/aarch64_wheel_ci_build.py | 21 ++++++++---------
 aarch64_linux/build_aarch64_wheel.py    | 30 +++++++++++++------------
 aarch64_linux/embed_library.py          |  2 +-
 3 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index d3910f227..d24b6f2fd 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -2,7 +2,7 @@
 # encoding: UTF-8
 
 import os
-import subprocess
+from subprocess import check_output
 from pygit2 import Repository
 from typing import List
 
@@ -11,7 +11,7 @@ def list_dir(path: str) -> List[str]:
     ''''
     Helper for getting paths for Python
     '''
-    return subprocess.check_output(["ls", "-1", path]).decode().split("\n")
+    return check_output(["ls", "-1", path]).decode().split("\n")
 
 
 def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
@@ -19,10 +19,12 @@ def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
     Using ArmComputeLibrary for aarch64 PyTorch
     '''
     print('Building Arm Compute Library')
+    acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
+                              "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"])
     os.system("cd / && mkdir /acl")
     os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
     os.system("cd ComputeLibrary; export acl_install_dir=/acl; "
-              "scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 fixed_format_kernels=1 build=native build_dir=$acl_install_dir/build; "
+              f"scons Werror=1 -j8 {acl_build_flags} build_dir=$acl_install_dir/build; "
               "cp -r arm_compute $acl_install_dir; "
               "cp -r include $acl_install_dir; "
               "cp -r utils $acl_install_dir; "
@@ -86,13 +88,12 @@ def parse_arguments():
     if override_package_version is not None:
         version = override_package_version
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
-    else:
-        if branch in ['nightly', 'master']:
-            build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
-            version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
-            build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
-        if branch.startswith("v1.") or branch.startswith("v2."):
-            build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
+    elif branch in ['nightly', 'master']:
+        build_date = check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
+        version = check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
+        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
+    elif branch.startswith(("v1.", "v2.")):
+        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
 
     if enable_mkldnn:
         build_ArmComputeLibrary(git_clone_flags)
diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index 333b8b910..0ff286ad2 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -2,9 +2,10 @@
 
 # This script is for building  AARCH64 wheels using AWS EC2 instances.
 # To generate binaries for the release follow these steps:
-# 1. Update mappings for each of the Domain Libraries by adding new row to a table like this:  "v1.11.0": ("0.11.0", "rc1"),
-# 2. Run script with following arguments for each of the supported python versions and specify required RC tag for example: v1.11.0-rc3:
-# build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>
+# 1. Update mappings for each of the Domain Libraries by adding new row to a table like this:
+#         "v1.11.0": ("0.11.0", "rc1"),
+# 2. Run script with following arguments for each of the supported python versions and required tag, for example:
+# build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch v1.11.0-rc3
 
 
 import boto3
@@ -177,7 +178,7 @@ def wait_for_connection(addr, port, timeout=15, attempt_cnt=5):
         try:
             with socket.create_connection((addr, port), timeout=timeout):
                 return
-        except (ConnectionRefusedError, socket.timeout):
+        except (ConnectionRefusedError, socket.timeout):  # noqa: PERF203
             if i == attempt_cnt - 1:
                 raise
             time.sleep(timeout)
@@ -203,7 +204,7 @@ def install_condaforge(host: RemoteHost,
     if host.using_docker():
         host.run_cmd("echo 'PATH=$HOME/miniforge3/bin:$PATH'>>.bashrc")
     else:
-        host.run_cmd(['sed', '-i', '\'/^# If not running interactively.*/i PATH=$HOME/miniforge3/bin:$PATH\'', '.bashrc'])
+        host.run_cmd(['sed', '-i', '\'/^# If not running interactively.*/i PATH=$HOME/miniforge3/bin:$PATH\'', '.bashrc'])  # noqa: E501
 
 
 def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None:
@@ -221,12 +222,13 @@ def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None:
     print('Building OpenBLAS')
     host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.25 {git_clone_flags}")
     make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8"
-    host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS")
+    host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS")  # noqa: E501
 
 
 def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None:
     print('Building Arm Compute Library')
-    acl_build_flags="debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 fixed_format_kernels=1 build=native"
+    acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
+                              "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"])
     host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
 
@@ -301,7 +303,7 @@ def build_torchvision(host: RemoteHost, *,
         # Remove .so files to force static linking
         host.run_cmd("rm miniforge3/lib/libpng.so miniforge3/lib/libpng16.so miniforge3/lib/libjpeg.so")
         # And patch setup.py to include libz dependency for libpng
-        host.run_cmd(['sed -i -e \'s/image_link_flags\\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py'])
+        host.run_cmd(['sed -i -e \'s/image_link_flags\\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py'])  # noqa: E501
 
     build_vars = ""
     if branch == "nightly":
@@ -525,7 +527,7 @@ def start_build(host: RemoteHost, *,
     if host.using_docker():
         print("Move libgfortant.a into a standard location")
         # HACK: pypa gforntran.a is compiled without PIC, which leads to the following error
-        # libgfortran.a(error.o)(.text._gfortrani_st_printf+0x34): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `__stack_chk_guard@@GLIBC_2.17'
+        # libgfortran.a(error.o)(.text._gfortrani_st_printf+0x34): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `__stack_chk_guard@@GLIBC_2.17'  # noqa: E501
         # Workaround by copying gfortran library from the host
         host.run_ssh_cmd("sudo apt-get install -y gfortran-8")
         host.run_cmd("mkdir -p /usr/lib/gcc/aarch64-linux-gnu/8")
@@ -543,10 +545,10 @@ def start_build(host: RemoteHost, *,
     # Breakpad build fails on aarch64
     build_vars = "USE_BREAKPAD=0 "
     if branch == 'nightly':
-        build_date = host.check_output("cd pytorch && git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
+        build_date = host.check_output("cd pytorch && git log --pretty=format:%s -1").strip().split()[0].replace("-", "")  # noqa: E501
         version = host.check_output("cat pytorch/version.txt").strip()[:-2]
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
-    if branch.startswith("v1.") or branch.startswith("v2."):
+    if branch.startswith(("v1.", "v2.")):
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
     if host.using_docker():
         build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
@@ -554,10 +556,10 @@ def start_build(host: RemoteHost, *,
         build_ArmComputeLibrary(host, git_clone_flags)
         print("build pytorch with mkldnn+acl backend")
         build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
-        host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}")
+        host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}")  # noqa: E501
         print('Repair the wheel')
         pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
-        host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}")
+        host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}")  # noqa: E501
         print('replace the original wheel with the repaired one')
         pytorch_repaired_wheel_name = host.list_dir("wheelhouse")[0]
         host.run_cmd(f"cp $HOME/wheelhouse/{pytorch_repaired_wheel_name} $HOME/pytorch/dist/{pytorch_wheel_name}")
@@ -705,7 +707,7 @@ def parse_arguments():
     parser.add_argument("--build-only", action="store_true")
     parser.add_argument("--test-only", type=str)
     parser.add_argument("--os", type=str, choices=list(os_amis.keys()), default='ubuntu20_04')
-    parser.add_argument("--python-version", type=str, choices=['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], default=None)
+    parser.add_argument("--python-version", type=str, choices=[f'3.{d}' for d in range(6, 12)], default=None)
     parser.add_argument("--alloc-instance", action="store_true")
     parser.add_argument("--list-instances", action="store_true")
     parser.add_argument("--pytorch-only", action="store_true")
diff --git a/aarch64_linux/embed_library.py b/aarch64_linux/embed_library.py
index 1a3114823..be6bb048f 100644
--- a/aarch64_linux/embed_library.py
+++ b/aarch64_linux/embed_library.py
@@ -42,7 +42,7 @@ def embed_library(whl_path, lib_soname, update_tag=False):
         torchlib_path = os.path.join(ctx._tmpdir.name, 'torch', 'lib')
         ctx.out_wheel=tmp_whl_name
         new_lib_path, new_lib_soname = None, None
-        for filename, elf in elf_file_filter(ctx.iter_files()):
+        for filename, _ in elf_file_filter(ctx.iter_files()):
             if not filename.startswith('torch/lib'):
                 continue
             libtree = lddtree(filename)

From 4f298cbaff97caeeaa1b278caf9874d6c367a750 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Sat, 9 Dec 2023 14:28:00 -0800
Subject: [PATCH 151/212] Extend test_linalg from smoke_test.py

To take device as an argument and run tests on both cpu and cuda
---
 test/smoke_test/smoke_test.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 3b5b18c35..f4c06150e 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -206,9 +206,9 @@ def smoke_test_conv2d() -> None:
             assert output is not None
 
 
-def smoke_test_linalg() -> None:
-    print("Testing smoke_test_linalg")
-    A = torch.randn(5, 3)
+def test_linalg(device="cpu") -> None:
+    print(f"Testing smoke_test_linalg on {device}")
+    A = torch.randn(5, 3, device=device)
     U, S, Vh = torch.linalg.svd(A, full_matrices=False)
     assert U.shape == A.shape and S.shape == torch.Size([3]) and Vh.shape == torch.Size([3, 3])
     torch.dist(A, U @ torch.diag(S) @ Vh)
@@ -217,15 +217,15 @@ def smoke_test_linalg() -> None:
     assert U.shape == torch.Size([5, 5]) and S.shape == torch.Size([3]) and Vh.shape == torch.Size([3, 3])
     torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)
 
-    A = torch.randn(7, 5, 3)
+    A = torch.randn(7, 5, 3, device=device)
     U, S, Vh = torch.linalg.svd(A, full_matrices=False)
     torch.dist(A, U @ torch.diag_embed(S) @ Vh)
 
-    if is_cuda_system:
+    if device == "cuda":
         supported_dtypes = [torch.float32, torch.float64]
         for dtype in supported_dtypes:
             print(f"Testing smoke_test_linalg with cuda for {dtype}")
-            A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
+            A = torch.randn(20, 16, 50, 100, device=device, dtype=dtype)
             torch.linalg.svd(A)
 
 
@@ -293,7 +293,9 @@ def main() -> None:
 
     check_version(options.package)
     smoke_test_conv2d()
-    smoke_test_linalg()
+    test_linalg()
+    if is_cuda_system:
+        test_linalg("cuda")
 
     if options.package == "all":
         smoke_test_modules()

From 70d5c5f7b5dd8005c36843beab7a087d1296066e Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Sat, 9 Dec 2023 16:39:26 -0800
Subject: [PATCH 152/212] Run smoke_test_linalg during check_binary

This is a regression test for https://github.com/pytorch/pytorch/issues/114862
---
 check_binary.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/check_binary.sh b/check_binary.sh
index 30b44b535..e9fde2bc5 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -408,6 +408,18 @@ if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRE
   fi # if libtorch
 fi # if cuda
 
+##########################
+# Run parts of smoke tests
+##########################
+if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then
+  pushd test/smoke_test
+  python -c "from smoke_test import test_linalg; test_linalg()"
+  if [[ "$DESIRED_CUDA" == *cuda* ]]; then
+    python -c "from smoke_test import test_linalg; test_linalg('cuda')"
+  fi
+  popd
+fi
+
 ###############################################################################
 # Check PyTorch supports TCP_TLS gloo transport
 ###############################################################################

From 5d7e8e1f49f0842a5c6d55be5212390c7bd3b875 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Sat, 9 Dec 2023 17:08:36 -0800
Subject: [PATCH 153/212] Fix linalg testing

---
 check_binary.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/check_binary.sh b/check_binary.sh
index e9fde2bc5..2b5f228d6 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -412,7 +412,7 @@ fi # if cuda
 # Run parts of smoke tests
 ##########################
 if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then
-  pushd test/smoke_test
+  pushd "$(dirname ${BASH_SOURCE[0]})/test/smoke_test"
   python -c "from smoke_test import test_linalg; test_linalg()"
   if [[ "$DESIRED_CUDA" == *cuda* ]]; then
     python -c "from smoke_test import test_linalg; test_linalg('cuda')"

From 2b17d68ca300a603ba03444129171fdb60c5754e Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Sun, 10 Dec 2023 13:38:19 -0800
Subject: [PATCH 154/212] [BE] Add CI for check_binary.sh changes (#1637)

Make sure latest nightly passes the testing for:
 - Linux Wheel CPU
 - Linux Wheel CUDA

Tweak script a bit to work correctly with relative path to executable
---
 .github/workflows/test-check-binary.yml | 37 +++++++++++++++++++++++++
 check_binary.sh                         |  2 +-
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/test-check-binary.yml

diff --git a/.github/workflows/test-check-binary.yml b/.github/workflows/test-check-binary.yml
new file mode 100644
index 000000000..e29ee7ba9
--- /dev/null
+++ b/.github/workflows/test-check-binary.yml
@@ -0,0 +1,37 @@
+name: Test check_binary
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    paths:
+      - .github/workflows/test-check-binary.yml
+      - check_binary.sh
+      - test/smoke_test/smoke_test.py
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  check_binary_linux_cpu:
+    name: Test check_binary.sh for Linux CPU
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      repository: "pytorch/builder"
+      docker-image: python:3.11
+      script: |
+          pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
+          DESIRED_PYTHON=3.11 DESIRED_CUDA=cpu PACKAGE_TYPE=manywheel ./check_binary.sh
+
+  check_binary_linux_cuda:
+    name: Test check_binary.sh for Linux CUDA
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      repository: "pytorch/builder"
+      runner: linux.4xlarge.nvidia.gpu
+      docker-image: python:3.11
+      script: |
+          pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
+          DESIRED_PYTHON=3.11 DESIRED_CUDA=cu121 PACKAGE_TYPE=manywheel ./check_binary.sh
diff --git a/check_binary.sh b/check_binary.sh
index 2b5f228d6..9a2cf065b 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -261,7 +261,7 @@ setup_link_flags () {
   fi
 }
 
-TEST_CODE_DIR="$(dirname ${BASH_SOURCE[0]})/test_example_code"
+TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code"
 build_and_run_example_cpp () {
   if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
     GLIBCXX_USE_CXX11_ABI=1

From 6f3cb2ba1753c5ecade8b800c29fc18d18d149db Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 14 Dec 2023 11:00:07 -0800
Subject: [PATCH 155/212] Keep nightly 20231010 for ExecuTorch alpha 0.1 for
 now (#1642)

---
 s3_management/manage.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/s3_management/manage.py b/s3_management/manage.py
index 47c151f08..851a848c6 100644
--- a/s3_management/manage.py
+++ b/s3_management/manage.py
@@ -112,6 +112,12 @@
 # How many packages should we keep of a specific package?
 KEEP_THRESHOLD = 60
 
+# TODO (huydhn): Clean this up once ExecuTorch has a new stable release that
+# match PyTorch stable release cadence. This nightly version is currently
+# referred to publicly in ExecuTorch alpha 0.1 release. So we want to keep
+# nightly binaries around for now
+KEEP_NIGHTLY_PACKAGES_FOR_EXECUTORCH = {datetime(2023, 10, 10, 0, 0)}
+
 S3IndexType = TypeVar('S3IndexType', bound='S3Index')
 
 
@@ -201,7 +207,10 @@ def nightly_packages_to_show(self: S3IndexType) -> List[S3Object]:
             if package_name not in PACKAGE_ALLOW_LIST:
                 to_hide.add(obj)
                 continue
-            if packages[package_name] >= KEEP_THRESHOLD or between_bad_dates(package_build_time):
+            if package_build_time not in KEEP_NIGHTLY_PACKAGES_FOR_EXECUTORCH and (
+                packages[package_name] >= KEEP_THRESHOLD
+                or between_bad_dates(package_build_time)
+            ):
                 to_hide.add(obj)
             else:
                 packages[package_name] += 1

From 3daf3bf2cfbe1ddf85fe1726dfc80e36599f37a5 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 14 Dec 2023 20:23:41 +0000
Subject: [PATCH 156/212] [Validations] do conda update before starting
 validations (#1643)

---
 .github/scripts/validate_binaries.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index acdcef5e5..22fc9d109 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -2,6 +2,8 @@ if [[ ${MATRIX_PACKAGE_TYPE} == "libtorch" ]]; then
     curl ${MATRIX_INSTALLATION} -o libtorch.zip
     unzip libtorch.zip
 else
+
+    conda update -y -n base -c defaults conda
     # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
     conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
     conda activate ${ENV_NAME}

From df2d4e8fb381c81030ce921378aaafe4c177645c Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 15 Dec 2023 16:54:07 +0000
Subject: [PATCH 157/212] [Validations] Validate aarch64 if all is slected
 (#1644)

---
 .github/workflows/validate-binaries.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
index fee16dca9..8dbe5f27c 100644
--- a/.github/workflows/validate-binaries.yml
+++ b/.github/workflows/validate-binaries.yml
@@ -96,7 +96,7 @@ jobs:
       release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
 
   linux-aarch64:
-    if:  inputs.os == 'linux-aarch64'
+    if:  inputs.os == 'linux-aarch64' || inputs.os == 'all'
     needs: generate-release-matrix
     uses: ./.github/workflows/validate-aarch64-linux-binaries.yml
     with:
@@ -105,6 +105,7 @@ jobs:
       torchonly: ${{ inputs.torchonly }}
       version: ${{ inputs.version }}
       release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
+
   mac:
     if:  inputs.os == 'macos' || inputs.os == 'all'
     needs: generate-release-matrix

From cd257e911b55a8001c3ddb10b078c9122c575984 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 15 Dec 2023 16:02:53 -0800
Subject: [PATCH 158/212] Fix validation workflow on aarch64 with conda 23.11.0
 and GLIBC_2.25 (#1645)

* Debug aarch64 clone

* Debug

* Fix validation workflow with conda 23.11.0 and GLIBC_2.25

* Gate the change on linux-aarch64 and keep the old LD_LIBRARY_PATH

* Try to unset LD_LIBRARY_PATH in the workflow instead

* Fix copy/paste typo
---
 .github/workflows/validate-aarch64-linux-binaries.yml |  8 +++++++-
 test/smoke_test/smoke_test.py                         | 11 ++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index 14b7b6395..6b1a60d7c 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -95,5 +95,11 @@ jobs:
         printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
         eval "$(conda shell.bash hook)"
 
-        # Standart case: Validate binaries
+        # NB: The latest conda 23.11.0 pulls in some dependencies of conda-libmamba-solver that
+        # require GLIBC_2.25, which is not available in the current aarch64 image causing the
+        # subsequence git command to fail. Basically, they don't work with CentOS 7 which AML 2
+        # is based on https://github.com/ContinuumIO/anaconda-issues/issues/12822
+        unset LD_LIBRARY_PATH
+
+        # Standard case: Validate binaries
         source ./.github/scripts/validate_binaries.sh
diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index f4c06150e..14e04d366 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -258,7 +258,16 @@ def smoke_test_modules():
         if module["repo"]:
             if not os.path.exists(f"{cwd}/{module['repo_name']}"):
                 print(f"Path does not exist: {cwd}/{module['repo_name']}")
-                subprocess.check_output(f"git clone --depth 1 {module['repo']}", stderr=subprocess.STDOUT, shell=True)
+                try:
+                    subprocess.check_output(
+                        f"git clone --depth 1 {module['repo']}",
+                        stderr=subprocess.STDOUT,
+                        shell=True,
+                    )
+                except subprocess.CalledProcessError as exc:
+                    raise RuntimeError(
+                        f"Cloning {module['repo']} FAIL: {exc.returncode} Output: {exc.output}"
+                    ) from exc
             try:
                 smoke_test_command = f"python3 {module['smoke_test']}"
                 if target_os == 'windows':

From 22d7be551d404455b49caf9df1ebf1b80c18fa04 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Mon, 18 Dec 2023 11:50:12 -0800
Subject: [PATCH 159/212] Do not hardcode triton version in builder code
 (#1646)

* Do not hardcode triton version in builder code

* Minor tweak to use pytorch_rootdir
---
 conda/build_pytorch.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 4c2e4836b..cf8b2f463 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -275,8 +275,9 @@ else
     fi
     if [[ "$OSTYPE" != "msys" ]]; then
         # TODO: Remove me when Triton has a proper release channel
+        TRITON_VERSION=$(cat $pytorch_rootdir/.ci/docker/triton_version.txt)
         TRITON_SHORTHASH=$(cut -c1-10 $pytorch_rootdir/.github/ci_commit_pins/triton.txt)
-        export CONDA_TRITON_CONSTRAINT="    - torchtriton==2.1.0+${TRITON_SHORTHASH} # [py < 312]"
+        export CONDA_TRITON_CONSTRAINT="    - torchtriton==${TRITON_VERSION}+${TRITON_SHORTHASH} # [py < 312]"
     fi
 
     build_string_suffix="cuda${CUDA_VERSION}_cudnn${CUDNN_VERSION}_${build_string_suffix}"

From 1b30e261d1d103264076a6f636801abc1905cbca Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 19 Dec 2023 20:12:40 -0800
Subject: [PATCH 160/212] [Lint] Prohibit tabs in shell scripts

Fix current violations
---
 .lintrunner.toml                     |  23 +++
 common/install_cuda.sh               |   8 +-
 conda/build_pytorch.sh               |   4 +-
 conda/pytorch-nightly/build.sh       |  22 +--
 ffmpeg/recipe/build.sh               |   2 +-
 manywheel/build_rocm.sh              |   8 +-
 manywheel/conda_build.sh             |   8 +-
 tools/linter/adapters/grep_linter.py | 272 +++++++++++++++++++++++++++
 8 files changed, 321 insertions(+), 26 deletions(-)
 create mode 100644 tools/linter/adapters/grep_linter.py

diff --git a/.lintrunner.toml b/.lintrunner.toml
index 7d48258bc..fdfca4cef 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -18,3 +18,26 @@ init_command = [
     'ruff==0.1.1',
 ]
 is_formatter = true
+
+[[linter]]
+code = 'TABS'
+include_patterns = ['**/*.sh']
+exclude_patterns = [
+    '**/*Makefile',
+    'common/install_rocm_drm.sh',
+    '.lintrunner.toml',
+]
+command = [
+    'python3',
+    'tools/linter/adapters/grep_linter.py',
+    # @lint-ignore TXT2
+    '--pattern=	',
+    '--linter-name=TABS',
+    '--error-name=saw some tabs',
+    '--replace-pattern=s/\t/    /',
+    """--error-description=\
+        This line has tabs; please replace them with spaces.\
+    """,
+    '--',
+    '@{{PATHSFILE}}'
+]
diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index f09666e64..35babf576 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -146,11 +146,11 @@ while test $# -gt 0
 do
     case "$1" in
     11.8) install_118; prune_118
-	        ;;
+        ;;
     12.1) install_121; prune_121
-	        ;;
-	*) echo "bad argument $1"; exit 1
-	   ;;
+        ;;
+    *) echo "bad argument $1"; exit 1
+        ;;
     esac
     shift
 done
diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index cf8b2f463..844d77f32 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -264,8 +264,8 @@ else
     # TODO, simplify after anaconda fixes their cudatoolkit versioning inconsistency.
     # see: https://github.com/conda-forge/conda-forge.github.io/issues/687#issuecomment-460086164
     if [[ "$desired_cuda" == "12.1" ]]; then
-	export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=12.1,<12.2 # [not osx]"
-	export MAGMA_PACKAGE="    - magma-cuda121 # [not osx and not win]"
+        export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=12.1,<12.2 # [not osx]"
+        export MAGMA_PACKAGE="    - magma-cuda121 # [not osx and not win]"
     elif [[ "$desired_cuda" == "11.8" ]]; then
         export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=11.8,<11.9 # [not osx]"
         export MAGMA_PACKAGE="    - magma-cuda118 # [not osx and not win]"
diff --git a/conda/pytorch-nightly/build.sh b/conda/pytorch-nightly/build.sh
index c649d3251..d9ccb708b 100755
--- a/conda/pytorch-nightly/build.sh
+++ b/conda/pytorch-nightly/build.sh
@@ -56,18 +56,18 @@ if [[ -n "$build_with_cuda" ]]; then
     export USE_STATIC_CUDNN=1 # links cudnn statically (driven by tools/setup_helpers/cudnn.py)
 
     if [[ $CUDA_VERSION == 11.8* ]]; then
-	TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;3.7+PTX;9.0"
-	#for cuda 11.8 we use cudnn 8.7
-	#which does not have single static libcudnn_static.a deliverable to link with
-	export USE_STATIC_CUDNN=0
-	#for cuda 11.8 include all dynamic loading libraries
-	DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8)
+        TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;3.7+PTX;9.0"
+        #for cuda 11.8 we use cudnn 8.7
+        #which does not have single static libcudnn_static.a deliverable to link with
+        export USE_STATIC_CUDNN=0
+        #for cuda 11.8 include all dynamic loading libraries
+        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8)
     elif [[ $CUDA_VERSION == 12.1* ]]; then
-	# cuda 12 does not support sm_3x
-	TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;9.0"
-	# for cuda 12.1 we use cudnn 8.8 and include all dynamic loading libraries
-	export USE_STATIC_CUDNN=0
-	DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-12.1/extras/CUPTI/lib64/libcupti.so.12)
+        # cuda 12 does not support sm_3x
+        TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;9.0"
+        # for cuda 12.1 we use cudnn 8.8 and include all dynamic loading libraries
+        export USE_STATIC_CUDNN=0
+        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-12.1/extras/CUPTI/lib64/libcupti.so.12)
     fi
     if [[ -n "$OVERRIDE_TORCH_CUDA_ARCH_LIST" ]]; then
         TORCH_CUDA_ARCH_LIST="$OVERRIDE_TORCH_CUDA_ARCH_LIST"
diff --git a/ffmpeg/recipe/build.sh b/ffmpeg/recipe/build.sh
index b3c5b5967..9b82c6553 100644
--- a/ffmpeg/recipe/build.sh
+++ b/ffmpeg/recipe/build.sh
@@ -23,7 +23,7 @@ fi
         --disable-static \
         --enable-version3 \
         --enable-zlib \
-	    --enable-libmp3lame
+        --enable-libmp3lame
 
 make -j${CPU_COUNT}
 make install -j${CPU_COUNT}
diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh
index 4fbca7697..a44d6212f 100755
--- a/manywheel/build_rocm.sh
+++ b/manywheel/build_rocm.sh
@@ -88,7 +88,7 @@ ROCM_SO_FILES=(
     "librccl.so"
     "librocblas.so"
     "librocfft.so"
-    "librocm_smi64.so"   
+    "librocm_smi64.so"
     "librocrand.so"
     "librocsolver.so"
     "librocsparse.so"
@@ -128,7 +128,7 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
         LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.6"
     else
         LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.5"
-    fi	
+    fi
     LIBDRM_PATH="/usr/lib/x86_64-linux-gnu/libdrm.so.2"
     LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1"
     MAYBE_LIB64=lib
@@ -170,8 +170,8 @@ do
         file_path=($(find $ROCM_HOME/ -name "$lib")) # Then search in ROCM_HOME
     fi
     if [[ -z $file_path ]]; then
-	echo "Error: Library file $lib is not found." >&2
-	exit 1
+        echo "Error: Library file $lib is not found." >&2
+        exit 1
     fi
     ROCM_SO_PATHS[${#ROCM_SO_PATHS[@]}]="$file_path" # Append lib to array
 done
diff --git a/manywheel/conda_build.sh b/manywheel/conda_build.sh
index 99a28768d..407b4e4d4 100755
--- a/manywheel/conda_build.sh
+++ b/manywheel/conda_build.sh
@@ -8,7 +8,7 @@ if ! ls /usr/local/cuda-7.5
 then
     echo "Downloading CUDA 7.5"
     wget -c http://developer.download.nvidia.com/compute/cuda/7.5/Prod/local_installers/cuda_7.5.18_linux.run \
-	 -O /remote/cuda_7.5.18_linux.run
+         -O /remote/cuda_7.5.18_linux.run
 
     echo "Installing CUDA 7.5"
     chmod +x /remote/cuda_7.5.18_linux.run
@@ -23,7 +23,7 @@ if ! ls /usr/local/cuda-8.0
 then
     echo "Downloading CUDA 8.0"
     wget -c https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run \
-	 -O /remote/cuda_8.0.61_linux-run
+         -O /remote/cuda_8.0.61_linux-run
 
     echo "Installing CUDA 8.0"
     chmod +x /remote/cuda_8.0.61_linux-run
@@ -39,7 +39,7 @@ if ! ls /usr/local/cuda-7.5/lib64/libcudnn.so.6.0.21
 then
     rm -rf /tmp/cuda
     wget -c http://developer.download.nvidia.com/compute/redist/cudnn/v6.0/cudnn-7.5-linux-x64-v6.0.tgz \
-	 -O /remote/cudnn-7.5-linux-x64-v6.0.tgz
+         -O /remote/cudnn-7.5-linux-x64-v6.0.tgz
     pushd /tmp
     tar -xvf /remote/cudnn-7.5-linux-x64-v6.0.tgz 
     cp -P /tmp/cuda/include/* /usr/local/cuda-7.5/include/
@@ -51,7 +51,7 @@ if ! ls /usr/local/cuda-8.0/lib64/libcudnn.so.6.0.21
 then
     rm -rf /tmp/cuda
     wget -c http://developer.download.nvidia.com/compute/redist/cudnn/v6.0/cudnn-8.0-linux-x64-v6.0.tgz \
-	 -O /remote/cudnn-8.0-linux-x64-v6.0.tgz
+         -O /remote/cudnn-8.0-linux-x64-v6.0.tgz
     pushd /tmp
     tar -xvf /remote/cudnn-8.0-linux-x64-v6.0.tgz
     cp -P /tmp/cuda/include/* /usr/local/cuda-8.0/include/
diff --git a/tools/linter/adapters/grep_linter.py b/tools/linter/adapters/grep_linter.py
new file mode 100644
index 000000000..168800eb4
--- /dev/null
+++ b/tools/linter/adapters/grep_linter.py
@@ -0,0 +1,272 @@
+"""
+Generic linter that greps for a pattern and optionally suggests replacements.
+"""
+
+import argparse
+import json
+import logging
+import os
+import subprocess
+import sys
+import time
+from enum import Enum
+from typing import Any, List, NamedTuple, Optional
+
+
+IS_WINDOWS: bool = os.name == "nt"
+
+
+def eprint(*args: Any, **kwargs: Any) -> None:
+    print(*args, file=sys.stderr, flush=True, **kwargs)
+
+
+class LintSeverity(str, Enum):
+    ERROR = "error"
+    WARNING = "warning"
+    ADVICE = "advice"
+    DISABLED = "disabled"
+
+
+class LintMessage(NamedTuple):
+    path: Optional[str]
+    line: Optional[int]
+    char: Optional[int]
+    code: str
+    severity: LintSeverity
+    name: str
+    original: Optional[str]
+    replacement: Optional[str]
+    description: Optional[str]
+
+
+def as_posix(name: str) -> str:
+    return name.replace("\\", "/") if IS_WINDOWS else name
+
+
+def run_command(
+    args: List[str],
+) -> "subprocess.CompletedProcess[bytes]":
+    logging.debug("$ %s", " ".join(args))
+    start_time = time.monotonic()
+    try:
+        return subprocess.run(
+            args,
+            capture_output=True,
+        )
+    finally:
+        end_time = time.monotonic()
+        logging.debug("took %dms", (end_time - start_time) * 1000)
+
+
+def lint_file(
+    matching_line: str,
+    allowlist_pattern: str,
+    replace_pattern: str,
+    linter_name: str,
+    error_name: str,
+    error_description: str,
+) -> Optional[LintMessage]:
+    # matching_line looks like:
+    #   tools/linter/clangtidy_linter.py:13:import foo.bar.baz
+    split = matching_line.split(":")
+    filename = split[0]
+
+    if allowlist_pattern:
+        try:
+            proc = run_command(["grep", "-nEHI", allowlist_pattern, filename])
+        except Exception as err:
+            return LintMessage(
+                path=None,
+                line=None,
+                char=None,
+                code=linter_name,
+                severity=LintSeverity.ERROR,
+                name="command-failed",
+                original=None,
+                replacement=None,
+                description=(
+                    f"Failed due to {err.__class__.__name__}:\n{err}"
+                    if not isinstance(err, subprocess.CalledProcessError)
+                    else (
+                        "COMMAND (exit code {returncode})\n"
+                        "{command}\n\n"
+                        "STDERR\n{stderr}\n\n"
+                        "STDOUT\n{stdout}"
+                    ).format(
+                        returncode=err.returncode,
+                        command=" ".join(as_posix(x) for x in err.cmd),
+                        stderr=err.stderr.decode("utf-8").strip() or "(empty)",
+                        stdout=err.stdout.decode("utf-8").strip() or "(empty)",
+                    )
+                ),
+            )
+
+        # allowlist pattern was found, abort lint
+        if proc.returncode == 0:
+            return None
+
+    original = None
+    replacement = None
+    if replace_pattern:
+        with open(filename) as f:
+            original = f.read()
+
+        try:
+            proc = run_command(["sed", "-r", replace_pattern, filename])
+            replacement = proc.stdout.decode("utf-8")
+        except Exception as err:
+            return LintMessage(
+                path=None,
+                line=None,
+                char=None,
+                code=linter_name,
+                severity=LintSeverity.ERROR,
+                name="command-failed",
+                original=None,
+                replacement=None,
+                description=(
+                    f"Failed due to {err.__class__.__name__}:\n{err}"
+                    if not isinstance(err, subprocess.CalledProcessError)
+                    else (
+                        "COMMAND (exit code {returncode})\n"
+                        "{command}\n\n"
+                        "STDERR\n{stderr}\n\n"
+                        "STDOUT\n{stdout}"
+                    ).format(
+                        returncode=err.returncode,
+                        command=" ".join(as_posix(x) for x in err.cmd),
+                        stderr=err.stderr.decode("utf-8").strip() or "(empty)",
+                        stdout=err.stdout.decode("utf-8").strip() or "(empty)",
+                    )
+                ),
+            )
+
+    return LintMessage(
+        path=split[0],
+        line=int(split[1]) if len(split) > 1 else None,
+        char=None,
+        code=linter_name,
+        severity=LintSeverity.ERROR,
+        name=error_name,
+        original=original,
+        replacement=replacement,
+        description=error_description,
+    )
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="grep wrapper linter.",
+        fromfile_prefix_chars="@",
+    )
+    parser.add_argument(
+        "--pattern",
+        required=True,
+        help="pattern to grep for",
+    )
+    parser.add_argument(
+        "--allowlist-pattern",
+        help="if this pattern is true in the file, we don't grep for pattern",
+    )
+    parser.add_argument(
+        "--linter-name",
+        required=True,
+        help="name of the linter",
+    )
+    parser.add_argument(
+        "--match-first-only",
+        action="store_true",
+        help="only match the first hit in the file",
+    )
+    parser.add_argument(
+        "--error-name",
+        required=True,
+        help="human-readable description of what the error is",
+    )
+    parser.add_argument(
+        "--error-description",
+        required=True,
+        help="message to display when the pattern is found",
+    )
+    parser.add_argument(
+        "--replace-pattern",
+        help=(
+            "the form of a pattern passed to `sed -r`. "
+            "If specified, this will become proposed replacement text."
+        ),
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="verbose logging",
+    )
+    parser.add_argument(
+        "filenames",
+        nargs="+",
+        help="paths to lint",
+    )
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        format="<%(threadName)s:%(levelname)s> %(message)s",
+        level=logging.NOTSET
+        if args.verbose
+        else logging.DEBUG
+        if len(args.filenames) < 1000
+        else logging.INFO,
+        stream=sys.stderr,
+    )
+
+    files_with_matches = []
+    if args.match_first_only:
+        files_with_matches = ["--files-with-matches"]
+
+    try:
+        proc = run_command(
+            ["grep", "-nEHI", *files_with_matches, args.pattern, *args.filenames]
+        )
+    except Exception as err:
+        err_msg = LintMessage(
+            path=None,
+            line=None,
+            char=None,
+            code=args.linter_name,
+            severity=LintSeverity.ERROR,
+            name="command-failed",
+            original=None,
+            replacement=None,
+            description=(
+                f"Failed due to {err.__class__.__name__}:\n{err}"
+                if not isinstance(err, subprocess.CalledProcessError)
+                else (
+                    "COMMAND (exit code {returncode})\n"
+                    "{command}\n\n"
+                    "STDERR\n{stderr}\n\n"
+                    "STDOUT\n{stdout}"
+                ).format(
+                    returncode=err.returncode,
+                    command=" ".join(as_posix(x) for x in err.cmd),
+                    stderr=err.stderr.decode("utf-8").strip() or "(empty)",
+                    stdout=err.stdout.decode("utf-8").strip() or "(empty)",
+                )
+            ),
+        )
+        print(json.dumps(err_msg._asdict()), flush=True)
+        sys.exit(0)
+
+    lines = proc.stdout.decode().splitlines()
+    for line in lines:
+        lint_message = lint_file(
+            line,
+            args.allowlist_pattern,
+            args.replace_pattern,
+            args.linter_name,
+            args.error_name,
+            args.error_description,
+        )
+        if lint_message is not None:
+            print(json.dumps(lint_message._asdict()), flush=True)
+
+
+if __name__ == "__main__":
+    main()

From c55c58b8720868a6d6e82720a37f97a649eda2f5 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 19 Dec 2023 20:35:07 -0800
Subject: [PATCH 161/212] Link conda packages with cusparselt

Fixes https://github.com/pytorch/pytorch/issues/115085
---
 conda/build_pytorch.sh          |  1 -
 conda/pytorch-nightly/build.sh  | 11 ++++-------
 conda/pytorch-nightly/meta.yaml |  1 -
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 844d77f32..06e72da7c 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -357,7 +357,6 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
          PYTORCH_GITHUB_ROOT_DIR="$pytorch_rootdir" \
          PYTORCH_BUILD_STRING="$build_string" \
          PYTORCH_MAGMA_CUDA_VERSION="$cuda_nodot" \
-         USE_CUSPARSELT=0 \
          conda build -c "$ANACONDA_USER" \
                      ${NO_TEST:-} \
                      --no-anaconda-upload \
diff --git a/conda/pytorch-nightly/build.sh b/conda/pytorch-nightly/build.sh
index d9ccb708b..db2b7b246 100755
--- a/conda/pytorch-nightly/build.sh
+++ b/conda/pytorch-nightly/build.sh
@@ -53,21 +53,18 @@ fi
 if [[ -n "$build_with_cuda" ]]; then
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
     TORCH_CUDA_ARCH_LIST="5.0;6.0;6.1;7.0;7.5;8.0;8.6"
-    export USE_STATIC_CUDNN=1 # links cudnn statically (driven by tools/setup_helpers/cudnn.py)
+    export USE_STATIC_CUDNN=0 # link with cudnn dynamically
+    export USE_CUSPARSELT=1 # link with cusparselt
 
     if [[ $CUDA_VERSION == 11.8* ]]; then
         TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;3.7+PTX;9.0"
-        #for cuda 11.8 we use cudnn 8.7
-        #which does not have single static libcudnn_static.a deliverable to link with
-        export USE_STATIC_CUDNN=0
         #for cuda 11.8 include all dynamic loading libraries
-        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8)
+        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8 /usr/local/cuda/lib64/libcusparseLt.so.0)
     elif [[ $CUDA_VERSION == 12.1* ]]; then
         # cuda 12 does not support sm_3x
         TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;9.0"
         # for cuda 12.1 we use cudnn 8.8 and include all dynamic loading libraries
-        export USE_STATIC_CUDNN=0
-        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-12.1/extras/CUPTI/lib64/libcupti.so.12)
+        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-12.1/extras/CUPTI/lib64/libcupti.so.12 /usr/local/cuda/lib64/libcusparseLt.so.0)
     fi
     if [[ -n "$OVERRIDE_TORCH_CUDA_ARCH_LIST" ]]; then
         TORCH_CUDA_ARCH_LIST="$OVERRIDE_TORCH_CUDA_ARCH_LIST"
diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 05be9c7d7..e56fe7f68 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -98,7 +98,6 @@ build:
     - _GLIBCXX_USE_CXX11_ABI # [unix]
     - MAX_JOBS # [unix]
     - OVERRIDE_TORCH_CUDA_ARCH_LIST
-    - USE_CUSPARSELT
 
 test:
  imports:

From 524a0272df7c6ab2bf004fe74b1fe09c5da6b667 Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Thu, 21 Dec 2023 13:09:05 -0600
Subject: [PATCH 162/212] aarch64: patch mkl-dnn for xbyak crashes due to /sys
 not accessible (#1648)

There are platforms with /sys not mounted. skip handling HW caps
for such platforms.

cherry-pick of: oneapi-src/oneDNN#1773
This fixes the issue# pytorch/pytorch#115482
---
 aarch64_linux/aarch64_wheel_ci_build.py |  4 ++
 aarch64_linux/build_aarch64_wheel.py    |  2 +
 mkldnn_fix/fix-xbyak-failure.patch      | 96 +++++++++++++++++++++++++
 3 files changed, 102 insertions(+)
 create mode 100644 mkldnn_fix/fix-xbyak-failure.patch

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index d24b6f2fd..a57dab545 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -106,6 +106,10 @@ def parse_arguments():
     else:
         print("build pytorch without mkldnn backend")
 
+    # patch mkldnn to fix aarch64 mac and aws lambda crash
+    print("Applying mkl-dnn patch to fix crash due to /sys not accesible")
+    os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/fix-xbyak-failure.patch")
+
     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
     pytorch_wheel_name = complete_wheel("pytorch")
     print(f"Build Compelete. Created {pytorch_wheel_name}..")
diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index 0ff286ad2..1615c78a6 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -556,6 +556,8 @@ def start_build(host: RemoteHost, *,
         build_ArmComputeLibrary(host, git_clone_flags)
         print("build pytorch with mkldnn+acl backend")
         build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
+        host.run_cmd("cd $HOME && git clone https://github.com/pytorch/builder.git")
+        host.run_cmd("cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/fix-xbyak-failure.patch")  # noqa: E501
         host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}")  # noqa: E501
         print('Repair the wheel')
         pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
diff --git a/mkldnn_fix/fix-xbyak-failure.patch b/mkldnn_fix/fix-xbyak-failure.patch
new file mode 100644
index 000000000..2ad278f0b
--- /dev/null
+++ b/mkldnn_fix/fix-xbyak-failure.patch
@@ -0,0 +1,96 @@
+cpu: aarch64: fix xbyak functions for /sys access failures
+
+There are platforms with /sys not mounted. skip handling HW caps
+for such platforms.
+
+This fixes the issue# pytorch/pytorch#115482
+---
+ .../xbyak_aarch64/src/util_impl_linux.h       | 24 ++++++++++++++-----
+ .../aarch64/xbyak_aarch64/src/util_impl_mac.h |  9 ++++---
+ 2 files changed, 24 insertions(+), 9 deletions(-)
+
+diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
+index 2c7b28e58b..860a05700f 100644
+--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
++++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
+@@ -144,8 +144,13 @@ private:
+     regex_t regexBuf;
+     regmatch_t match[1];
+ 
+-    if (regcomp(&regexBuf, regex, REG_EXTENDED) != 0)
+-      throw ERR_INTERNAL;
++    if (regcomp(&regexBuf, regex, REG_EXTENDED) != 0) {
++      /* There are platforms with /sys not mounted. return empty buffers
++       * in these scenarios
++       */
++      buf[0] = '\0';
++      return 0;
++    }
+ 
+     const int retVal = regexec(&regexBuf, path, 1, match, 0);
+     regfree(&regexBuf);
+@@ -187,8 +192,12 @@ private:
+       regex_t regexBuf;
+       regmatch_t match[2];
+ 
+-      if (regcomp(&regexBuf, "index[0-9]*$", REG_EXTENDED) != 0)
+-        throw ERR_INTERNAL;
++      if (regcomp(&regexBuf, "index[0-9]*$", REG_EXTENDED) != 0) {
++        /* There are platforms with /sys not mounted. return gracefully
++         * in these scenarios
++         */
++        goto init_and_return_false;
++      }
+ 
+       if (regexec(&regexBuf, dp->d_name, 1, match, 0) == 0) { // Found index[1-9][0-9]. directory
+         char *dir_name = buf0;
+@@ -438,12 +447,15 @@ private:
+ 
+     FILE *file = fopen(path_midr_el1, "r");
+     if (file == nullptr) {
+-      throw Error(ERR_INTERNAL);
++      /* There are platforms with /sys not mounted. return empty buffer
++       * in these scenarios
++       */
++      cacheInfo_.midr_el1 = 0xFE << 24;
+       return;
+     }
+ 
+     if (fread(buf, sizeof(char), 64, file) == 0) {
+-      throw Error(ERR_INTERNAL);
++      cacheInfo_.midr_el1 = 0xFE << 24;
+       return;
+     }
+ 
+diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
+index ebd6dba7c0..93bdae1d7a 100644
+--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
++++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
+@@ -102,18 +102,21 @@ private:
+     size_t val = 0;
+     size_t len = sizeof(val);
+ 
++    /* There are platforms with /sys not mounted. skip
++     * handling HW caps for such platforms.
++     */
+     if (sysctlbyname(hw_opt_atomics, &val, &len, NULL, 0) != 0)
+-      throw Error(ERR_INTERNAL);
++      type_ = 0;
+     else
+       type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ATOMIC : 0;
+ 
+     if (sysctlbyname(hw_opt_fp, &val, &len, NULL, 0) != 0)
+-      throw Error(ERR_INTERNAL);
++      type_ = 0;
+     else
+       type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_FP : 0;
+ 
+     if (sysctlbyname(hw_opt_neon, &val, &len, NULL, 0) != 0)
+-      throw Error(ERR_INTERNAL);
++      type_ = 0;
+     else
+       type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ADVSIMD : 0;
+   }
+-- 
+2.34.1
+

From b9d2b93c08bd256c42f7b1900a07bd958b3ab8ee Mon Sep 17 00:00:00 2001
From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com>
Date: Fri, 22 Dec 2023 22:03:36 -0600
Subject: [PATCH 163/212] Update builder images to ROCm6.0 (#1647)

* Update ROCm versions for docker images

* Don't build MIOpen from source for ROCm6.0

* Temporarily use magma fork with ROCm6.0 patch

* Update ROCm versions for docker images

* Add gfx942

* Update MIOpen repo

* Magma PR 42 is merged, so use upstream repo master branch now

* gfx942 target only fully supported for ROCm6.0 and above
---
 .github/workflows/build-libtorch-images.yml  | 2 +-
 .github/workflows/build-manywheel-images.yml | 2 +-
 common/install_miopen.sh                     | 7 +++++--
 common/install_rocm_magma.sh                 | 2 +-
 libtorch/build_all_docker.sh                 | 2 +-
 libtorch/build_docker.sh                     | 3 +++
 manywheel/build_all_docker.sh                | 2 +-
 manywheel/build_docker.sh                    | 3 +++
 8 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml
index 7968bbb26..7c8e59f36 100644
--- a/.github/workflows/build-libtorch-images.yml
+++ b/.github/workflows/build-libtorch-images.yml
@@ -52,7 +52,7 @@ jobs:
     runs-on: linux.12xlarge
     strategy:
       matrix:
-        rocm_version: ["5.6", "5.7"]
+        rocm_version: ["5.7", "6.0"]
     env:
       GPU_ARCH_TYPE: rocm
       GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index d717416f6..46056ba14 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -58,7 +58,7 @@ jobs:
     runs-on: linux.12xlarge
     strategy:
       matrix:
-        rocm_version: ["5.6", "5.7"]
+        rocm_version: ["5.7", "6.0"]
     env:
       GPU_ARCH_TYPE: rocm
       GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
diff --git a/common/install_miopen.sh b/common/install_miopen.sh
index 779bc755d..09ab251b7 100644
--- a/common/install_miopen.sh
+++ b/common/install_miopen.sh
@@ -56,7 +56,10 @@ MIOPEN_CMAKE_COMMON_FLAGS="
 -DMIOPEN_BUILD_DRIVER=OFF
 "
 # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
-if [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 50800 ]]; then
+if [[ $ROCM_INT -ge 60000 ]] && [[ $ROCM_INT -lt 60100 ]]; then
+    echo "ROCm 6.0 MIOpen does not need any patches, do not build from source"
+    exit 0
+elif [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 60000 ]]; then
     echo "ROCm 5.7 MIOpen does not need any patches, do not build from source"
     exit 0
 elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then
@@ -85,7 +88,7 @@ fi
 
 yum remove -y miopen-hip
 
-git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH}
+git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
 pushd MIOpen
 # remove .git to save disk space since CI runner was running out
 rm -rf .git
diff --git a/common/install_rocm_magma.sh b/common/install_rocm_magma.sh
index c37c1e30a..c8e43f675 100644
--- a/common/install_rocm_magma.sh
+++ b/common/install_rocm_magma.sh
@@ -15,7 +15,7 @@ pushd magma
 if [[ $PYTORCH_BRANCH == "release/1.10.1" ]]; then
     git checkout magma_ctrl_launch_bounds
 else
-    git checkout 28592a7170e4b3707ed92644bf4a689ed600c27f
+    git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6
 fi
 cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
 echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
diff --git a/libtorch/build_all_docker.sh b/libtorch/build_all_docker.sh
index fb6bd975b..1a3a90d5a 100755
--- a/libtorch/build_all_docker.sh
+++ b/libtorch/build_all_docker.sh
@@ -8,6 +8,6 @@ for cuda_version in 12.1 11.8; do
     GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh"
 done
 
-for rocm_version in 5.6 5.7; do
+for rocm_version in 5.7 6.0; do
     GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh"
 done
diff --git a/libtorch/build_docker.sh b/libtorch/build_docker.sh
index 8997f69cf..b7ebdd36e 100755
--- a/libtorch/build_docker.sh
+++ b/libtorch/build_docker.sh
@@ -36,6 +36,9 @@ case ${GPU_ARCH_TYPE} in
             echo "ERROR: rocm regex failed"
             exit 1
         fi
+        if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
+            PYTORCH_ROCM_ARCH+=";gfx942"
+        fi
         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
         ;;
     *)
diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh
index 2995e3be7..8a02361cb 100644
--- a/manywheel/build_all_docker.sh
+++ b/manywheel/build_all_docker.sh
@@ -16,7 +16,7 @@ for cuda_version in 12.1 11.8; do
     MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh"
 done
 
-for rocm_version in 5.6 5.7; do
+for rocm_version in 5.7 6.0; do
     GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
     MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh"
 done
diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh
index e547b4275..63b8e0c3d 100755
--- a/manywheel/build_docker.sh
+++ b/manywheel/build_docker.sh
@@ -57,6 +57,9 @@ case ${GPU_ARCH_TYPE} in
             echo "ERROR: rocm regex failed"
             exit 1
         fi
+        if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
+            PYTORCH_ROCM_ARCH+=";gfx942"
+        fi
         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9"
         ;;
     *)

From 770c8275d6ad237fdc637bb97e5f84018baee08b Mon Sep 17 00:00:00 2001
From: cyyever <cyyever@outlook.com>
Date: Wed, 27 Dec 2023 23:35:10 +0800
Subject: [PATCH 164/212] Avoid finding out std::basic_string_view (#1528)

As pytorch moving to C++17, the binary can contain both "std::basic_string_view" and "std::__cxx11::basic_string<", change the pattern to avoid finding out std::basic_string_view, causing false positives.
---
 check_binary.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/check_binary.sh b/check_binary.sh
index 9a2cf065b..98a5267eb 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -116,7 +116,7 @@ if [[ "$(uname)" != 'Darwin' ]]; then
   #
   # To check whether it is using cxx11 ABI, check non-existence of symbol:
   PRE_CXX11_SYMBOLS=(
-    "std::basic_string"
+    "std::basic_string<"
     "std::list"
   )
   # To check whether it is using pre-cxx11 ABI, check non-existence of symbol:

From 27c47026e4108f8e2f02892ab5062a42f28a3160 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 28 Dec 2023 17:56:03 +0000
Subject: [PATCH 165/212] Add test ops validation for validation workflows
 (#1650)

* Add test ops validation

* include workflows
---
 .github/scripts/validate_test_ops.sh          | 16 ++++++++++++++++
 .github/workflows/validate-binaries.yml       | 11 +++++++++++
 .github/workflows/validate-linux-binaries.yml | 15 +++++++++++++++
 3 files changed, 42 insertions(+)
 create mode 100644 .github/scripts/validate_test_ops.sh

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
new file mode 100644
index 000000000..7c80b601e
--- /dev/null
+++ b/.github/scripts/validate_test_ops.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -eux -o pipefail
+
+retry () {
+    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+}
+
+# Clone the Pytorch branch
+retry git clone --depth 1 https://github.com/pytorch/pytorch.git
+retry git submodule update --init --recursive
+pushd pytorch
+
+# Run test_ops validation
+export CUDA_LAUNCH_BLOCKING=1
+python3 test/test_ops.py
diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
index 8dbe5f27c..558be8e56 100644
--- a/.github/workflows/validate-binaries.yml
+++ b/.github/workflows/validate-binaries.yml
@@ -27,6 +27,11 @@ on:
         default: false
         required: false
         type: boolean
+      include-test-ops:
+        description: 'Include Test Ops tests (only Linux)'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       os:
@@ -65,6 +70,11 @@ on:
         default: ""
         required: false
         type: string
+      include-test-ops:
+        description: 'Include Test Ops tests (only Linux)'
+        default: false
+        required: false
+        type: boolean
 
 
 jobs:
@@ -94,6 +104,7 @@ jobs:
       torchonly: ${{ inputs.torchonly }}
       version: ${{ inputs.version }}
       release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
+      include-test-ops: ${{ inputs.include-test-ops }}
 
   linux-aarch64:
     if:  inputs.os == 'linux-aarch64' || inputs.os == 'all'
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index d1c6c29bd..3f652eff8 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -27,6 +27,11 @@ on:
         default: ""
         required: false
         type: string
+      include-test-ops:
+        description: 'Include Test Ops tests (only Linux)'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -58,6 +63,11 @@ on:
         default: ""
         required: false
         type: string
+      include-test-ops:
+        description: 'Include Test Ops tests (only Linux)'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-linux-matrix:
@@ -84,6 +94,7 @@ jobs:
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TORCH_ONLY=${{ inputs.torchonly }}
+        export INCLUDE_TEST_OPS=${{ inputs.include-test-ops }}
         export RELEASE_VERSION=${{ inputs.version }}
         export TARGET_OS="linux"
         eval "$(conda shell.bash hook)"
@@ -97,3 +108,7 @@ jobs:
 
         # Standart case: Validate binaries
         source ./.github/scripts/validate_binaries.sh
+
+        if [[ ${INCLUDE_TEST_OPS} == 'true' ]]; then
+          source ./.github/scripts/validate_test_ops.sh
+        fi

From b16ac1fa2e64e98f26afaba4e300fe5843156349 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 28 Dec 2023 19:18:21 +0000
Subject: [PATCH 166/212] Add test ops validation for validation workflows
 (#1651)

---
 .github/scripts/validate_binaries.sh          | 4 ++++
 .github/workflows/validate-linux-binaries.yml | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 22fc9d109..916f04f1a 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -49,6 +49,10 @@ else
         export PATH=${OLD_PATH}
     fi
 
+    if [[ ${INCLUDE_TEST_OPS} == 'true' ]]; then
+        source ./.github/scripts/validate_test_ops.sh
+    fi
+
     conda deactivate
     conda env remove -n ${ENV_NAME}
 fi
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 3f652eff8..74271e3d1 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -108,7 +108,3 @@ jobs:
 
         # Standart case: Validate binaries
         source ./.github/scripts/validate_binaries.sh
-
-        if [[ ${INCLUDE_TEST_OPS} == 'true' ]]; then
-          source ./.github/scripts/validate_test_ops.sh
-        fi

From 9c8a8dcce8ede569cada3f05adef755449547ce6 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 28 Dec 2023 20:33:11 +0000
Subject: [PATCH 167/212] Add test ops validation for validation workflows
 (#1652)

---
 .github/scripts/validate_test_ops.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index 7c80b601e..d8031c071 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -11,6 +11,8 @@ retry git clone --depth 1 https://github.com/pytorch/pytorch.git
 retry git submodule update --init --recursive
 pushd pytorch
 
+pip install expecttest pyyaml jinja2
+
 # Run test_ops validation
 export CUDA_LAUNCH_BLOCKING=1
 python3 test/test_ops.py

From 50cb7c5c54556cb1376b5f706de10ea2d8c1ac71 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 28 Dec 2023 21:24:21 +0000
Subject: [PATCH 168/212] Add test ops validation for validation workflows
 (#1653)

---
 .github/scripts/validate_test_ops.sh | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index d8031c071..00bca8d79 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -6,12 +6,19 @@ retry () {
     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
 }
 
+BRANCH = "@main"
+if [[ ${MATRIX_CHANNEL} == "test" ]]
+    SHORT_VERSION=${MATRIX_STABLE_VERSION%.*}
+    BRANCH="@release/${SHORT_VERSION}"
+fi
+
+
 # Clone the Pytorch branch
-retry git clone --depth 1 https://github.com/pytorch/pytorch.git
+retry git clone --depth 1 https://github.com/pytorch/pytorch.git${BRANCH}
 retry git submodule update --init --recursive
 pushd pytorch
 
-pip install expecttest pyyaml jinja2
+pip install expecttest pyyaml jinja2 packaging
 
 # Run test_ops validation
 export CUDA_LAUNCH_BLOCKING=1

From 3b47169c71361c7b1fe02b349bfc416031f6ba0d Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 28 Dec 2023 21:39:01 +0000
Subject: [PATCH 169/212] Add test ops validation for validation workflows
 (#1654)

---
 .github/scripts/validate_test_ops.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index 00bca8d79..547b7340b 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -6,8 +6,8 @@ retry () {
     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
 }
 
-BRANCH = "@main"
-if [[ ${MATRIX_CHANNEL} == "test" ]]
+BRANCH="@main"
+if [[ ${MATRIX_CHANNEL} == "test" ]]; then
     SHORT_VERSION=${MATRIX_STABLE_VERSION%.*}
     BRANCH="@release/${SHORT_VERSION}"
 fi

From f2b22ada2f5e028e1683ee18766fa5d3751ad271 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 28 Dec 2023 22:00:04 +0000
Subject: [PATCH 170/212] Add test ops validation for validation workflows
 (#1655)

---
 .github/scripts/validate_test_ops.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index 547b7340b..bd724394b 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -6,15 +6,15 @@ retry () {
     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
 }
 
-BRANCH="@main"
+BRANCH=""
 if [[ ${MATRIX_CHANNEL} == "test" ]]; then
     SHORT_VERSION=${MATRIX_STABLE_VERSION%.*}
-    BRANCH="@release/${SHORT_VERSION}"
+    BRANCH="--branch release/${SHORT_VERSION}"
 fi
 
 
 # Clone the Pytorch branch
-retry git clone --depth 1 https://github.com/pytorch/pytorch.git${BRANCH}
+retry git clone ${BRANCH} --depth 1 https://github.com/pytorch/pytorch.git
 retry git submodule update --init --recursive
 pushd pytorch
 

From b91724c654c6618883823fcef3c1841637fb3fbc Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Thu, 28 Dec 2023 22:27:59 +0000
Subject: [PATCH 171/212] [validations] Add missing required packages (#1656)

---
 .github/scripts/validate_test_ops.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index bd724394b..91ef3ffde 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -18,7 +18,7 @@ retry git clone ${BRANCH} --depth 1 https://github.com/pytorch/pytorch.git
 retry git submodule update --init --recursive
 pushd pytorch
 
-pip install expecttest pyyaml jinja2 packaging
+pip install expecttest numpy pyyaml jinja2 packaging xmlrunner hypothesis unittest-xml-reporting
 
 # Run test_ops validation
 export CUDA_LAUNCH_BLOCKING=1

From f1e19a7ecdd72804e02e7129f3893a87db68bb77 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 29 Dec 2023 15:15:20 +0000
Subject: [PATCH 172/212] [validations] Perform test_ops only on CUDA binaries
 (#1657)

---
 .github/scripts/validate_binaries.sh | 2 +-
 .github/scripts/validate_test_ops.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 916f04f1a..23a411d19 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -49,7 +49,7 @@ else
         export PATH=${OLD_PATH}
     fi
 
-    if [[ ${INCLUDE_TEST_OPS} == 'true' ]]; then
+    if [[ ${INCLUDE_TEST_OPS} == 'true' &&  ${MATRIX_GPU_ARCH_TYPE} == 'cuda' ]]; then
         source ./.github/scripts/validate_test_ops.sh
     fi
 
diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index 91ef3ffde..60686ee09 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -18,8 +18,8 @@ retry git clone ${BRANCH} --depth 1 https://github.com/pytorch/pytorch.git
 retry git submodule update --init --recursive
 pushd pytorch
 
-pip install expecttest numpy pyyaml jinja2 packaging xmlrunner hypothesis unittest-xml-reporting
+pip install expecttest numpy pyyaml jinja2 packaging hypothesis unittest-xml-reporting
 
 # Run test_ops validation
 export CUDA_LAUNCH_BLOCKING=1
-python3 test/test_ops.py
+python3 test/test_ops.py TestCommonCUDA

From 52259ba524d1b5b915b057525b1f360dd996cb95 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 29 Dec 2023 15:50:43 +0000
Subject: [PATCH 173/212] [validations] Adjust timeout for linux jobs (#1658)

---
 .github/workflows/validate-linux-binaries.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 74271e3d1..f674afa04 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -90,6 +90,7 @@ jobs:
       ref: ${{ inputs.ref || github.ref }}
       job-name: ${{ matrix.build_name }}
       binary-matrix: ${{ toJSON(matrix) }}
+      timeout: 120
       script: |
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"

From 58b0295915e4f1ffe73a1305b06edeadc48e1230 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 29 Dec 2023 17:11:02 +0000
Subject: [PATCH 174/212] [validations] Restrict testing for python 3.8-3.11
 (#1659)

---
 .github/scripts/validate_binaries.sh          | 3 ++-
 .github/scripts/validate_test_ops.sh          | 2 +-
 .github/workflows/validate-linux-binaries.yml | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 23a411d19..503947556 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -49,7 +49,8 @@ else
         export PATH=${OLD_PATH}
     fi
 
-    if [[ ${INCLUDE_TEST_OPS} == 'true' &&  ${MATRIX_GPU_ARCH_TYPE} == 'cuda' ]]; then
+    # We are only interested in CUDA tests and Python 3.8-3.11. Not all requirement libraries are available for 3.12 yet.
+    if [[ ${INCLUDE_TEST_OPS} == 'true' &&  ${MATRIX_GPU_ARCH_TYPE} == 'cuda' && ${MATRIX_PYTHON_VERSION} != "3.12" ]]; then
         source ./.github/scripts/validate_test_ops.sh
     fi
 
diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index 60686ee09..e93f28918 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -18,7 +18,7 @@ retry git clone ${BRANCH} --depth 1 https://github.com/pytorch/pytorch.git
 retry git submodule update --init --recursive
 pushd pytorch
 
-pip install expecttest numpy pyyaml jinja2 packaging hypothesis unittest-xml-reporting
+pip install expecttest numpy pyyaml jinja2 packaging hypothesis unittest-xml-reporting scipy
 
 # Run test_ops validation
 export CUDA_LAUNCH_BLOCKING=1
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index f674afa04..de5bda999 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -90,7 +90,7 @@ jobs:
       ref: ${{ inputs.ref || github.ref }}
       job-name: ${{ matrix.build_name }}
       binary-matrix: ${{ toJSON(matrix) }}
-      timeout: 120
+      timeout: 180
       script: |
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"

From f2efe21571d9c83b891be121c4f9d50cd5ecdc38 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 29 Dec 2023 20:20:59 +0000
Subject: [PATCH 175/212] [validations] Fix use case if INCLUDE_TEST_OPS is not
 set (#1660)

---
 .github/scripts/validate_binaries.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 503947556..042b22767 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -50,7 +50,7 @@ else
     fi
 
     # We are only interested in CUDA tests and Python 3.8-3.11. Not all requirement libraries are available for 3.12 yet.
-    if [[ ${INCLUDE_TEST_OPS} == 'true' &&  ${MATRIX_GPU_ARCH_TYPE} == 'cuda' && ${MATRIX_PYTHON_VERSION} != "3.12" ]]; then
+    if [[ ${INCLUDE_TEST_OPS:-} == 'true' &&  ${MATRIX_GPU_ARCH_TYPE} == 'cuda' && ${MATRIX_PYTHON_VERSION} != "3.12" ]]; then
         source ./.github/scripts/validate_test_ops.sh
     fi
 

From ca784208feabb8a3ed3a50a1d66afa49ee855186 Mon Sep 17 00:00:00 2001
From: Wei Wang <143543872+nWEIdia@users.noreply.github.com>
Date: Fri, 5 Jan 2024 06:24:11 -0800
Subject: [PATCH 176/212] Add unit tests and one line reproducers to detect bad
 pytorch cuda wheels (#1663)

* Add one line reproducers and unit tests that would fail when bad wheels
were generated by the compiler(s).
nextafter reproducer thanks to @malfet!

* cosmetic fixes

* fix comments
---
 .github/scripts/validate_test_ops.sh | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index e93f28918..e874c75a8 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -20,6 +20,16 @@ pushd pytorch
 
 pip install expecttest numpy pyyaml jinja2 packaging hypothesis unittest-xml-reporting scipy
 
-# Run test_ops validation
-export CUDA_LAUNCH_BLOCKING=1
-python3 test/test_ops.py TestCommonCUDA
+# Run pytorch cuda wheels validation 
+# Detect ReduceLogicKernel (ReduceOp and kernel) IMA
+python test/test_ops.py -k test_dtypes_all_cuda
+# Detect BinaryMulKernel (elementwise binary functor internal mul) IMA
+python test/test_torch.py -k test_index_reduce_reduce_prod_cuda_int32
+# Detect BinaryBitwiseOpsKernels (at::native::BitwiseAndFunctor) IMA
+python test/test_binary_ufuncs.py -k test_contig_vs_every_other___rand___cuda_int32
+# Detect MaxMinElementwiseKernel (maximum) IMA
+python test/test_schema_check.py -k test_schema_correctness_clamp_cuda_int8
+# Detect StepKernel (nextafter) IMA
+python -c "import torch; print(torch.nextafter(torch.tensor([-4.5149, -5.9053, -0.9516, -2.3615,  1.5591], device='cuda:0'), torch.tensor(3.8075, device='cuda:0')))"
+# Detect BinaryGeometricKernels (atan2) IMA
+python -c "import torch; x = (torch.randn((2,1,1), dtype=torch.float, device="cuda")*5).to(torch.float32); y=(torch.randn((), dtype=torch.float, device="cuda")*5).to(torch.float32); print(torch.atan2(x,y))"

From fe83c2156946c7f7f2b37b358de55ae8b01b1b6e Mon Sep 17 00:00:00 2001
From: Wei Wang <143543872+nWEIdia@users.noreply.github.com>
Date: Fri, 5 Jan 2024 16:34:58 -0800
Subject: [PATCH 177/212] Fix quotation issues when migrating from python file
 to one line format (#1664)

Sorry, looks like the last line had an issue while porting it from multi-line python file to one-line.

Side question: when does this file get used? Is it only used during release binary generation/testing?
---
 .github/scripts/validate_test_ops.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index e874c75a8..0578d5ed9 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -32,4 +32,4 @@ python test/test_schema_check.py -k test_schema_correctness_clamp_cuda_int8
 # Detect StepKernel (nextafter) IMA
 python -c "import torch; print(torch.nextafter(torch.tensor([-4.5149, -5.9053, -0.9516, -2.3615,  1.5591], device='cuda:0'), torch.tensor(3.8075, device='cuda:0')))"
 # Detect BinaryGeometricKernels (atan2) IMA
-python -c "import torch; x = (torch.randn((2,1,1), dtype=torch.float, device="cuda")*5).to(torch.float32); y=(torch.randn((), dtype=torch.float, device="cuda")*5).to(torch.float32); print(torch.atan2(x,y))"
+python -c "import torch; x = (torch.randn((2,1,1), dtype=torch.float, device='cuda')*5).to(torch.float32); y=(torch.randn((), dtype=torch.float, device='cuda')*5).to(torch.float32); print(torch.atan2(x,y))"

From 04ef1bf031cc8b75d5d7f8a5e7a0832d5a0a46da Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 8 Jan 2024 16:53:56 +0000
Subject: [PATCH 178/212] Add nccl version print for cuda related smoke test
 (#1667)

---
 test/smoke_test/smoke_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index 14e04d366..c518f15c8 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -163,6 +163,7 @@ def smoke_test_cuda(package: str, runtime_error_check: str) -> None:
                 f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}"
             )
         print(f"torch cuda: {torch.version.cuda}")
+        print(f"torch nccl version: {torch.cuda.nccl.version()}" )
         # todo add cudnn version validation
         print(f"torch cudnn: {torch.backends.cudnn.version()}")
         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")

From 588ab91d6b344fab54c83f1c34e0021e8772c6fc Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 8 Jan 2024 21:37:08 +0000
Subject: [PATCH 179/212] Apply nccl test to linux only (#1669)

---
 test/smoke_test/smoke_test.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
index c518f15c8..cf78eb9e1 100644
--- a/test/smoke_test/smoke_test.py
+++ b/test/smoke_test/smoke_test.py
@@ -163,14 +163,17 @@ def smoke_test_cuda(package: str, runtime_error_check: str) -> None:
                 f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}"
             )
         print(f"torch cuda: {torch.version.cuda}")
-        print(f"torch nccl version: {torch.cuda.nccl.version()}" )
         # todo add cudnn version validation
         print(f"torch cudnn: {torch.backends.cudnn.version()}")
         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
 
         # torch.compile is available only on Linux and python 3.8-3.11
-        if (sys.platform in ["linux", "linux2"]) and sys.version_info < (3, 12, 0):
-            smoke_test_compile()
+        # nccl is availbale only on Linux
+        if (sys.platform in ["linux", "linux2"]):
+            print(f"torch nccl version: {torch.cuda.nccl.version()}")
+
+            if(sys.version_info < (3, 12, 0)):
+                smoke_test_compile()
 
         if runtime_error_check == "enabled":
             test_cuda_runtime_errors_captured()

From 4c758b39db39b80c4b0ab43a83e3e519bdce4565 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 9 Jan 2024 01:54:08 +0000
Subject: [PATCH 180/212] Build nccl after installing cuda (#1670)

Fix: https://github.com/pytorch/pytorch/issues/116977

Nccl 2.19.3 don't exist for cuda 11.8 and cuda 12.1. Refer to https://docs.nvidia.com/deeplearning/nccl/release-notes/rel_2-19-3.html#rel_2-19-3 CUDA 12.0, 12.2, 12.3 are supported.

Hence we do manual build. Follow this build process:
https://github.com/NVIDIA/nccl/tree/v2.19.3-1?tab=readme-ov-file#build

We want nccl version be exactly the same as installed here:
https://github.com/pytorch/pytorch/blob/main/.github/scripts/generate_binary_build_matrix.py#L45
---
 common/install_cuda.sh | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index 35babf576..01ed13def 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -33,13 +33,13 @@ function install_118 {
     rm -rf tmp_cudnn
 
     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
-    mkdir tmp_nccl && cd tmp_nccl
-    wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz
-    tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz
-    cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/
-    cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/
+    # Follow build: https://github.com/NVIDIA/nccl/tree/v2.19.3-1?tab=readme-ov-file#build
+    git clone -b v2.19.3-1 --depth 1 https://github.com/NVIDIA/nccl.git
+    cd nccl && make -j src.build
+    cp -a build/include/* /usr/local/cuda/include/
+    cp -a build/lib/* /usr/local/cuda/lib64/
     cd ..
-    rm -rf tmp_nccl
+    rm -rf nccl
 
     install_cusparselt_040
 
@@ -66,13 +66,13 @@ function install_121 {
     rm -rf tmp_cudnn
 
     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
-    mkdir tmp_nccl && cd tmp_nccl
-    wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.18.1/nccl_2.18.1-1+cuda12.1_x86_64.txz
-    tar xf nccl_2.18.1-1+cuda12.1_x86_64.txz
-    cp -a nccl_2.18.1-1+cuda12.1_x86_64/include/* /usr/local/cuda/include/
-    cp -a nccl_2.18.1-1+cuda12.1_x86_64/lib/* /usr/local/cuda/lib64/
+    # Follow build: https://github.com/NVIDIA/nccl/tree/v2.19.3-1?tab=readme-ov-file#build
+    git clone -b v2.19.3-1 --depth 1 https://github.com/NVIDIA/nccl.git
+    cd nccl && make -j src.build
+    cp -a build/include/* /usr/local/cuda/include/
+    cp -a build/lib/* /usr/local/cuda/lib64/
     cd ..
-    rm -rf tmp_nccl
+    rm -rf nccl
 
     install_cusparselt_040
 

From 53b5b02311b3a736e30e17a656c63de9c78c45c5 Mon Sep 17 00:00:00 2001
From: Jesse Cai <jessecai@fb.com>
Date: Tue, 9 Jan 2024 18:06:23 -0500
Subject: [PATCH 181/212] Update cusparselt to v0.5.2 (#1672)

This PR adds in support for cuSPARSELt v0.5.2 and updates the cuda 12.1 build step to use it instead of 0.4.0

Also fixes a typo when deleting the cusparselt folder after installing.
---
 common/install_cuda.sh | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index 01ed13def..a62831f7d 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -10,11 +10,22 @@ function install_cusparselt_040 {
     cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/include/* /usr/local/cuda/include/
     cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/lib/* /usr/local/cuda/lib64/
     popd
-    rm -rf tmp_custparselt
+    rm -rf tmp_cusparselt
+}
+
+function install_cusparselt_052 {
+    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
+    mkdir tmp_cusparselt && pushd tmp_cusparselt
+    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
+    tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
+    cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/
+    cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
+    popd
+    rm -rf tmp_cusparselt
 }
 
 function install_118 {
-    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.5.0"
+    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.4.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -47,7 +58,7 @@ function install_118 {
 }
 
 function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1 and cuSparseLt-0.5.0"
+    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1 and cuSparseLt-0.5.2"
     rm -rf /usr/local/cuda-12.1 /usr/local/cuda
     # install CUDA 12.1.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
@@ -74,7 +85,7 @@ function install_121 {
     cd ..
     rm -rf nccl
 
-    install_cusparselt_040
+    install_cusparselt_052
 
     ldconfig
 }

From 1d1f352b852253ae880c129ce2d019904f113bf6 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 12 Jan 2024 21:11:39 +0000
Subject: [PATCH 182/212] Run test ops tests from outside of pytorch root
 folder (#1676)

---
 .github/scripts/validate_test_ops.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index 0578d5ed9..12963f289 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -20,7 +20,7 @@ pushd pytorch
 
 pip install expecttest numpy pyyaml jinja2 packaging hypothesis unittest-xml-reporting scipy
 
-# Run pytorch cuda wheels validation 
+# Run pytorch cuda wheels validation
 # Detect ReduceLogicKernel (ReduceOp and kernel) IMA
 python test/test_ops.py -k test_dtypes_all_cuda
 # Detect BinaryMulKernel (elementwise binary functor internal mul) IMA
@@ -29,7 +29,10 @@ python test/test_torch.py -k test_index_reduce_reduce_prod_cuda_int32
 python test/test_binary_ufuncs.py -k test_contig_vs_every_other___rand___cuda_int32
 # Detect MaxMinElementwiseKernel (maximum) IMA
 python test/test_schema_check.py -k test_schema_correctness_clamp_cuda_int8
+
+pushd /tmp
 # Detect StepKernel (nextafter) IMA
 python -c "import torch; print(torch.nextafter(torch.tensor([-4.5149, -5.9053, -0.9516, -2.3615,  1.5591], device='cuda:0'), torch.tensor(3.8075, device='cuda:0')))"
 # Detect BinaryGeometricKernels (atan2) IMA
 python -c "import torch; x = (torch.randn((2,1,1), dtype=torch.float, device='cuda')*5).to(torch.float32); y=(torch.randn((), dtype=torch.float, device='cuda')*5).to(torch.float32); print(torch.atan2(x,y))"
+popd

From 9870b250419df4fc8267ded3746cf03fcfa62674 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 15 Jan 2024 21:50:38 +0000
Subject: [PATCH 183/212] Remove s3 update html job and scripts (#1677)

---
 .github/workflows/update-s3-html.yml |  35 --
 .lintrunner.toml                     |   2 +-
 s3_management/Dockerfile             |   6 -
 s3_management/Makefile               |   9 -
 s3_management/README.md              |  21 --
 s3_management/backup_conda.py        |  73 ----
 s3_management/manage.py              | 517 ---------------------------
 s3_management/requirements.txt       |   2 -
 8 files changed, 1 insertion(+), 664 deletions(-)
 delete mode 100644 .github/workflows/update-s3-html.yml
 delete mode 100644 s3_management/Dockerfile
 delete mode 100644 s3_management/Makefile
 delete mode 100644 s3_management/README.md
 delete mode 100644 s3_management/backup_conda.py
 delete mode 100644 s3_management/manage.py
 delete mode 100644 s3_management/requirements.txt

diff --git a/.github/workflows/update-s3-html.yml b/.github/workflows/update-s3-html.yml
deleted file mode 100644
index 7c285418e..000000000
--- a/.github/workflows/update-s3-html.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: Update S3 HTML indices for download.pytorch.org
-
-on:
-  schedule:
-    # Update the indices every 30 minutes
-    - cron: "*/30 * * * *"
-  workflow_dispatch:
-
-jobs:
-  update:
-    strategy:
-      matrix:
-        prefix: ["whl", "whl/test", "whl/nightly", "whl/lts/1.8"]
-      fail-fast: False
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    secrets: inherit
-    with:
-      repository: pytorch/builder
-      timeout: 60
-      secrets-env: AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY
-      script: |
-        set -ex
-
-        # Create Conda Environment
-        git config --global --add safe.directory /__w/builder/builder
-        conda create --quiet -y --prefix run_env python="3.8"
-        conda activate ./run_env
-
-        # Set Envs
-        export AWS_ACCESS_KEY_ID="${SECRET_AWS_ACCESS_KEY_ID}"
-        export AWS_SECRET_ACCESS_KEY="${SECRET_AWS_SECRET_ACCESS_KEY}"
-
-        # Install requirements
-        pip install -r s3_management/requirements.txt
-        python s3_management/manage.py --generate-pep503 ${{ matrix.prefix }}
diff --git a/.lintrunner.toml b/.lintrunner.toml
index fdfca4cef..b22997683 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -2,7 +2,7 @@ merge_base_with = "origin/main"
 
 [[linter]]
 code = 'RUFF'
-include_patterns = ['test/smoke_test/*.py', 's3_management/*.py', 'aarch64_linux/*.py']
+include_patterns = ['test/smoke_test/*.py', 'aarch64_linux/*.py']
 command = [
     'python3',
     'tools/linter/adapters/ruff_linter.py',
diff --git a/s3_management/Dockerfile b/s3_management/Dockerfile
deleted file mode 100644
index def716f75..000000000
--- a/s3_management/Dockerfile
+++ /dev/null
@@ -1,6 +0,0 @@
-FROM python:3.8
-WORKDIR /work
-ADD requirements.txt .
-RUN pip install -r requirements.txt
-ADD manage.py .
-ENTRYPOINT ["python", "/work/manage.py"]
diff --git a/s3_management/Makefile b/s3_management/Makefile
deleted file mode 100644
index e9e4699d1..000000000
--- a/s3_management/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-DIR:=$(strip $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))))
-
-.PHONY: build-image
-build-image: requirements.txt manage.py
-	docker build -t pytorch/manage_s3_html "$(DIR)"
-
-.PHONY: push-image
-push-image: build-image
-	docker push pytorch/manage_s3_html
diff --git a/s3_management/README.md b/s3_management/README.md
deleted file mode 100644
index edc87691b..000000000
--- a/s3_management/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# s3_management
-
-This directory houses scripts to maintain the s3 HTML indices for https://download.pytorch.org/whl
-
-## Building the image
-
-```
-make build-image
-```
-
-## Pushing the image
-
-```
-make push-image
-```
-
-## Running the image
-
-```
-docker run --rm -it -e AWS_SECRET_ACCESS_KEY -e AWS_ACCESS_KEY_ID pytorch/manage_s3_html all
-```
diff --git a/s3_management/backup_conda.py b/s3_management/backup_conda.py
deleted file mode 100644
index 7dafa32b4..000000000
--- a/s3_management/backup_conda.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env python3
-# Downloads domain pytorch and library packages from channel
-# And backs them up to S3
-# Do not use unless you know what you are doing
-# Usage:  python backup_conda.py --version 1.6.0
-
-import boto3
-from typing import List, Optional
-import conda.api
-import urllib
-import os
-import hashlib
-import argparse
-
-S3 = boto3.resource('s3')
-BUCKET = S3.Bucket('pytorch-backup')
-_known_subdirs = ["linux-64", "osx-64", "osx-arm64", "win-64"]
-
-
-def compute_md5(path:str) -> str:
-    with open(path, "rb") as f:
-        return hashlib.md5(f.read()).hexdigest()
-
-
-def download_conda_package(package:str, version:Optional[str] = None,
-                           depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]:
-    packages = conda.api.SubdirData.query_all(package,
-                                              channels = [channel] if channel is not None else None,
-                                              subdirs = _known_subdirs)
-    rc = []
-
-    for pkg in packages:
-        if version is not None and pkg.version != version:
-            continue
-        if depends is not None and depends not in pkg.depends:
-            continue
-
-        print(f"Downloading {pkg.url}...")
-        os.makedirs(pkg.subdir, exist_ok = True)
-        fname = f"{pkg.subdir}/{pkg.fn}"
-        if not os.path.exists(fname):
-            with open(fname, "wb") as f, urllib.request.urlopen(pkg.url) as url:
-                f.write(url.read())
-        if compute_md5(fname) != pkg.md5:
-            print(f"md5 of {fname} is {compute_md5(fname)} does not match {pkg.md5}")
-            continue
-        rc.append(fname)
-
-    return rc
-
-def upload_to_s3(prefix: str, fnames: List[str]) -> None:
-    for fname in fnames:
-        BUCKET.upload_file(fname, f"{prefix}/{fname}")
-        print(fname)
-
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--version",
-        help="PyTorch Version to backup",
-        type=str,
-        required = True
-    )
-    options = parser.parse_args()
-    rc = download_conda_package("pytorch", channel = "pytorch", version = options.version)
-    upload_to_s3(f"v{options.version}/conda", rc)
-
-    for libname in ["torchvision", "torchaudio", "torchtext"]:
-        print(f"processing {libname}")
-        rc = download_conda_package(libname, channel = "pytorch", depends = f"pytorch {options.version}")
-        upload_to_s3(f"v{options.version}/conda", rc)
diff --git a/s3_management/manage.py b/s3_management/manage.py
deleted file mode 100644
index 851a848c6..000000000
--- a/s3_management/manage.py
+++ /dev/null
@@ -1,517 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import base64
-import concurrent.futures
-import dataclasses
-import functools
-import time
-
-from os import path, makedirs
-from datetime import datetime
-from collections import defaultdict
-from typing import Iterable, List, Type, Dict, Set, TypeVar, Optional
-from re import sub, match, search
-from packaging.version import parse as _parse_version, Version, InvalidVersion
-
-import boto3
-
-
-S3 = boto3.resource('s3')
-CLIENT = boto3.client('s3')
-BUCKET = S3.Bucket('pytorch')
-
-ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz")
-ACCEPTED_SUBDIR_PATTERNS = [
-    r"cu[0-9]+",            # for cuda
-    r"rocm[0-9]+\.[0-9]+",  # for rocm
-    "cpu",
-]
-PREFIXES_WITH_HTML = {
-    "whl": "torch_stable.html",
-    "whl/lts/1.8": "torch_lts.html",
-    "whl/nightly": "torch_nightly.html",
-    "whl/test": "torch_test.html",
-    "libtorch": "index.html",
-    "libtorch/nightly": "index.html",
-}
-
-# NOTE: This refers to the name on the wheels themselves and not the name of
-# package as specified by setuptools, for packages with "-" (hyphens) in their
-# names you need to convert them to "_" (underscores) in order for them to be
-# allowed here since the name of the wheels is compared here
-PACKAGE_ALLOW_LIST = {
-    "Pillow",
-    "certifi",
-    "charset_normalizer",
-    "cmake",
-    "colorama",
-    "fbgemm_gpu",
-    "filelock",
-    "fsspec",
-    "idna",
-    "Jinja2",
-    "lit",
-    "MarkupSafe",
-    "mpmath",
-    "nestedtensor",
-    "networkx",
-    "numpy",
-    "nvidia_cublas_cu11",
-    "nvidia_cuda_cupti_cu11",
-    "nvidia_cuda_nvrtc_cu11",
-    "nvidia_cuda_runtime_cu11",
-    "nvidia_cudnn_cu11",
-    "nvidia_cufft_cu11",
-    "nvidia_curand_cu11",
-    "nvidia_cusolver_cu11",
-    "nvidia_cusparse_cu11",
-    "nvidia_nccl_cu11",
-    "nvidia_nvtx_cu11",
-    "nvidia_cublas_cu12",
-    "nvidia_cuda_cupti_cu12",
-    "nvidia_cuda_nvrtc_cu12",
-    "nvidia_cuda_runtime_cu12",
-    "nvidia_cudnn_cu12",
-    "nvidia_cufft_cu12",
-    "nvidia_curand_cu12",
-    "nvidia_cusolver_cu12",
-    "nvidia_cusparse_cu12",
-    "nvidia_nccl_cu12",
-    "nvidia_nvtx_cu12",
-    "nvidia_nvjitlink_cu12",
-    "packaging",
-    "portalocker",
-    "pytorch_triton",
-    "pytorch_triton_rocm",
-    "requests",
-    "sympy",
-    "torch",
-    "torch_tensorrt",
-    "torcharrow",
-    "torchaudio",
-    "torchcsprng",
-    "torchdata",
-    "torchdistx",
-    "torchmetrics",
-    "torchrec",
-    "torchtext",
-    "torchvision",
-    "triton",
-    "tqdm",
-    "typing_extensions",
-    "urllib3",
-    "xformers",
-}
-
-# Should match torch-2.0.0.dev20221221+cu118-cp310-cp310-linux_x86_64.whl as:
-# Group 1: torch-2.0.0.dev
-# Group 2: 20221221
-PACKAGE_DATE_REGEX = r"([a-zA-z]*-[0-9.]*.dev)([0-9]*)"
-
-# How many packages should we keep of a specific package?
-KEEP_THRESHOLD = 60
-
-# TODO (huydhn): Clean this up once ExecuTorch has a new stable release that
-# match PyTorch stable release cadence. This nightly version is currently
-# referred to publicly in ExecuTorch alpha 0.1 release. So we want to keep
-# nightly binaries around for now
-KEEP_NIGHTLY_PACKAGES_FOR_EXECUTORCH = {datetime(2023, 10, 10, 0, 0)}
-
-S3IndexType = TypeVar('S3IndexType', bound='S3Index')
-
-
-@dataclasses.dataclass(frozen=False)
-@functools.total_ordering
-class S3Object:
-    key: str
-    orig_key: str
-    checksum: Optional[str]
-    size: Optional[int]
-
-    def __hash__(self):
-        return hash(self.key)
-
-    def __str__(self):
-        return self.key
-
-    def __eq__(self, other):
-        return self.key == other.key
-
-    def __lt__(self, other):
-        return self.key < other.key
-
-
-def extract_package_build_time(full_package_name: str) -> datetime:
-    result = search(PACKAGE_DATE_REGEX, full_package_name)
-    if result is not None:
-        try:
-            return datetime.strptime(result.group(2), "%Y%m%d")
-        except ValueError:
-            # Ignore any value errors since they probably shouldn't be hidden anyways
-            pass
-    return datetime.now()
-
-
-def between_bad_dates(package_build_time: datetime):
-    start_bad = datetime(year=2022, month=8, day=17)
-    end_bad = datetime(year=2022, month=12, day=30)
-    return start_bad <= package_build_time <= end_bad
-
-
-def safe_parse_version(ver_str: str) -> Version:
-    try:
-        return _parse_version(ver_str)
-    except InvalidVersion:
-        return Version("0.0.0")
-
-
-
-class S3Index:
-    def __init__(self: S3IndexType, objects: List[S3Object], prefix: str) -> None:
-        self.objects = objects
-        self.prefix = prefix.rstrip("/")
-        self.html_name = PREFIXES_WITH_HTML[self.prefix]
-        # should dynamically grab subdirectories like whl/test/cu101
-        # so we don't need to add them manually anymore
-        self.subdirs = {
-            path.dirname(obj.key) for obj in objects if path.dirname != prefix
-        }
-
-    def nightly_packages_to_show(self: S3IndexType) -> List[S3Object]:
-        """Finding packages to show based on a threshold we specify
-
-        Basically takes our S3 packages, normalizes the version for easier
-        comparisons, then iterates over normalized versions until we reach a
-        threshold and then starts adding package to delete after that threshold
-        has been reached
-
-        After figuring out what versions we'd like to hide we iterate over
-        our original object list again and pick out the full paths to the
-        packages that are included in the list of versions to delete
-        """
-        # also includes versions without GPU specifier (i.e. cu102) for easier
-        # sorting, sorts in reverse to put the most recent versions first
-        all_sorted_packages = sorted(
-            {self.normalize_package_version(obj) for obj in self.objects},
-            key=lambda name_ver: safe_parse_version(name_ver.split('-', 1)[-1]),
-            reverse=True,
-        )
-        packages: Dict[str, int] = defaultdict(int)
-        to_hide: Set[str] = set()
-        for obj in all_sorted_packages:
-            full_package_name = path.basename(obj)
-            package_name = full_package_name.split('-')[0]
-            package_build_time = extract_package_build_time(full_package_name)
-            # Hard pass on packages that are included in our allow list
-            if package_name not in PACKAGE_ALLOW_LIST:
-                to_hide.add(obj)
-                continue
-            if package_build_time not in KEEP_NIGHTLY_PACKAGES_FOR_EXECUTORCH and (
-                packages[package_name] >= KEEP_THRESHOLD
-                or between_bad_dates(package_build_time)
-            ):
-                to_hide.add(obj)
-            else:
-                packages[package_name] += 1
-        return list(set(self.objects).difference({
-            obj for obj in self.objects
-            if self.normalize_package_version(obj) in to_hide
-        }))
-
-    def is_obj_at_root(self, obj: S3Object) -> bool:
-        return path.dirname(obj.key) == self.prefix
-
-    def _resolve_subdir(self, subdir: Optional[str] = None) -> str:
-        if not subdir:
-            subdir = self.prefix
-        # make sure we strip any trailing slashes
-        return subdir.rstrip("/")
-
-    def gen_file_list(
-        self,
-        subdir: Optional[str] = None,
-        package_name: Optional[str] = None
-    ) -> Iterable[S3Object]:
-        objects = self.objects
-        subdir = self._resolve_subdir(subdir) + '/'
-        for obj in objects:
-            if package_name is not None and self.obj_to_package_name(obj) != package_name:
-                continue
-            if self.is_obj_at_root(obj) or obj.key.startswith(subdir):
-                yield obj
-
-    def get_package_names(self, subdir: Optional[str] = None) -> List[str]:
-        return sorted({self.obj_to_package_name(obj) for obj in self.gen_file_list(subdir)})
-
-    def normalize_package_version(self: S3IndexType, obj: S3Object) -> str:
-        # removes the GPU specifier from the package name as well as
-        # unnecessary things like the file extension, architecture name, etc.
-        return sub(
-            r"%2B.*",
-            "",
-            "-".join(path.basename(obj.key).split("-")[:2])
-        )
-
-    def obj_to_package_name(self, obj: S3Object) -> str:
-        return path.basename(obj.key).split('-', 1)[0]
-
-    def to_legacy_html(
-        self,
-        subdir: Optional[str] = None
-    ) -> str:
-        """Generates a string that can be used as the HTML index
-
-        Takes our objects and transforms them into HTML that have historically
-        been used by pip for installing pytorch.
-
-        NOTE: These are not PEP 503 compliant but are here for legacy purposes
-        """
-        out: List[str] = []
-        subdir = self._resolve_subdir(subdir)
-        is_root = subdir == self.prefix
-        for obj in self.gen_file_list(subdir):
-            # Strip our prefix
-            sanitized_obj = obj.key.replace(subdir, "", 1)
-            if sanitized_obj.startswith('/'):
-                sanitized_obj = sanitized_obj.lstrip("/")
-            # we include objects at our root prefix so that users can still
-            # install packages like torchaudio / torchtext even if they want
-            # to install a specific GPU arch of torch / torchvision
-            if not is_root and self.is_obj_at_root(obj):
-                # strip root prefix
-                sanitized_obj = obj.key.replace(self.prefix, "", 1).lstrip("/")
-                sanitized_obj = f"../{sanitized_obj}"
-            out.append(f'<a href="{sanitized_obj}">{sanitized_obj}</a><br/>')
-        return "\n".join(sorted(out))
-
-    def to_simple_package_html(
-        self,
-        subdir: Optional[str],
-        package_name: str
-    ) -> str:
-        """Generates a string that can be used as the package simple HTML index
-        """
-        out: List[str] = []
-        # Adding html header
-        out.append('<!DOCTYPE html>')
-        out.append('<html>')
-        out.append('  <body>')
-        out.append('    <h1>Links for {}</h1>'.format(package_name.lower().replace("_", "-")))
-        for obj in sorted(self.gen_file_list(subdir, package_name)):
-            maybe_fragment = f"#sha256={obj.checksum}" if obj.checksum else ""
-            out.append(f'    <a href="/{obj.key}{maybe_fragment}">{path.basename(obj.key).replace("%2B","+")}</a><br/>')
-        # Adding html footer
-        out.append('  </body>')
-        out.append('</html>')
-        out.append(f'<!--TIMESTAMP {int(time.time())}-->')
-        return '\n'.join(out)
-
-    def to_simple_packages_html(
-        self,
-        subdir: Optional[str],
-    ) -> str:
-        """Generates a string that can be used as the simple HTML index
-        """
-        out: List[str] = []
-        # Adding html header
-        out.append('<!DOCTYPE html>')
-        out.append('<html>')
-        out.append('  <body>')
-        for pkg_name in sorted(self.get_package_names(subdir)):
-            out.append(f'    <a href="{pkg_name.lower().replace("_","-")}/">{pkg_name.replace("_","-")}</a><br/>')
-        # Adding html footer
-        out.append('  </body>')
-        out.append('</html>')
-        out.append(f'<!--TIMESTAMP {int(time.time())}-->')
-        return '\n'.join(out)
-
-    def upload_legacy_html(self) -> None:
-        for subdir in self.subdirs:
-            print(f"INFO Uploading {subdir}/{self.html_name}")
-            BUCKET.Object(
-                key=f"{subdir}/{self.html_name}"
-            ).put(
-                ACL='public-read',
-                CacheControl='no-cache,no-store,must-revalidate',
-                ContentType='text/html',
-                Body=self.to_legacy_html(subdir=subdir)
-            )
-
-    def upload_pep503_htmls(self) -> None:
-        for subdir in self.subdirs:
-            print(f"INFO Uploading {subdir}/index.html")
-            BUCKET.Object(
-                key=f"{subdir}/index.html"
-            ).put(
-                ACL='public-read',
-                CacheControl='no-cache,no-store,must-revalidate',
-                ContentType='text/html',
-                Body=self.to_simple_packages_html(subdir=subdir)
-            )
-            for pkg_name in self.get_package_names(subdir=subdir):
-                compat_pkg_name = pkg_name.lower().replace("_", "-")
-                print(f"INFO Uploading {subdir}/{compat_pkg_name}/index.html")
-                BUCKET.Object(
-                    key=f"{subdir}/{compat_pkg_name}/index.html"
-                ).put(
-                    ACL='public-read',
-                    CacheControl='no-cache,no-store,must-revalidate',
-                    ContentType='text/html',
-                    Body=self.to_simple_package_html(subdir=subdir, package_name=pkg_name)
-                )
-
-    def save_legacy_html(self) -> None:
-        for subdir in self.subdirs:
-            print(f"INFO Saving {subdir}/{self.html_name}")
-            makedirs(subdir, exist_ok=True)
-            with open(path.join(subdir, self.html_name), mode="w", encoding="utf-8") as f:
-                f.write(self.to_legacy_html(subdir=subdir))
-
-    def save_pep503_htmls(self) -> None:
-        for subdir in self.subdirs:
-            print(f"INFO Saving {subdir}/index.html")
-            makedirs(subdir, exist_ok=True)
-            with open(path.join(subdir, "index.html"), mode="w", encoding="utf-8") as f:
-                f.write(self.to_simple_packages_html(subdir=subdir))
-            for pkg_name in self.get_package_names(subdir=subdir):
-                makedirs(path.join(subdir, pkg_name), exist_ok=True)
-                with open(path.join(subdir, pkg_name, "index.html"), mode="w", encoding="utf-8") as f:
-                    f.write(self.to_simple_package_html(subdir=subdir, package_name=pkg_name))
-
-    def compute_sha256(self) -> None:
-        for obj in self.objects:
-            if obj.checksum is not None:
-                continue
-            print(f"Updating {obj.orig_key} of size {obj.size} with SHA256 checksum")
-            s3_obj = BUCKET.Object(key=obj.orig_key)
-            s3_obj.copy_from(CopySource={"Bucket": BUCKET.name, "Key": obj.orig_key},
-                             Metadata=s3_obj.metadata, MetadataDirective="REPLACE",
-                             ACL="public-read",
-                             ChecksumAlgorithm="SHA256")
-
-    @classmethod
-    def has_public_read(cls: Type[S3IndexType], key: str) -> bool:
-        def is_all_users_group(o) -> bool:
-            return o.get("Grantee", {}).get("URI") == "http://acs.amazonaws.com/groups/global/AllUsers"
-
-        def can_read(o) -> bool:
-            return o.get("Permission") in ["READ", "FULL_CONTROL"]
-
-        acl_grants = CLIENT.get_object_acl(Bucket=BUCKET.name, Key=key)["Grants"]
-        return any(is_all_users_group(x) and can_read(x) for x in acl_grants)
-
-    @classmethod
-    def grant_public_read(cls: Type[S3IndexType], key: str) -> None:
-        CLIENT.put_object_acl(Bucket=BUCKET.name, Key=key, ACL="public-read")
-
-    @classmethod
-    def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]:
-        obj_names = []
-        for obj in BUCKET.objects.filter(Prefix=prefix):
-            is_acceptable = any([path.dirname(obj.key) == prefix] + [
-                match(
-                    f"{prefix}/{pattern}",
-                    path.dirname(obj.key)
-                )
-                for pattern in ACCEPTED_SUBDIR_PATTERNS
-            ]) and obj.key.endswith(ACCEPTED_FILE_EXTENSIONS)
-            if not is_acceptable:
-                continue
-            obj_names.append(obj.key)
-        return obj_names
-
-    def fetch_metadata(self: S3IndexType) -> None:
-        # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible.
-        with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
-            for idx, future in {
-                idx: executor.submit(
-                    lambda key: CLIENT.head_object(
-                        Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled"
-                    ),
-                    obj.orig_key,
-                )
-                for (idx, obj) in enumerate(self.objects)
-                if obj.size is None
-            }.items():
-                response = future.result()
-                sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex()
-                # For older files, rely on checksum-sha256 metadata that can be added to the file later
-                if sha256 is None:
-                    sha256 = response.get("Metadata", {}).get("checksum-sha256")
-                self.objects[idx].checksum = sha256
-                if size := response.get("ContentLength"):
-                    self.objects[idx].size = int(size)
-
-    @classmethod
-    def from_S3(cls: Type[S3IndexType], prefix: str, with_metadata: bool = True) -> S3IndexType:
-        prefix = prefix.rstrip("/")
-        obj_names = cls.fetch_object_names(prefix)
-
-        def sanitize_key(key: str) -> str:
-            return key.replace("+", "%2B")
-
-        rc = cls([S3Object(key=sanitize_key(key),
-                           orig_key=key,
-                           checksum=None,
-                           size=None) for key in obj_names], prefix)
-        if prefix == "whl/nightly":
-           rc.objects = rc.nightly_packages_to_show()
-        if with_metadata:
-            rc.fetch_metadata()
-        return rc
-
-    @classmethod
-    def undelete_prefix(cls: Type[S3IndexType], prefix: str) -> None:
-        paginator = CLIENT.get_paginator("list_object_versions")
-        for page in paginator.paginate(Bucket=BUCKET.name, Prefix=prefix):
-            for obj in page.get("DeleteMarkers", []):
-                if not obj.get("IsLatest"):
-                    continue
-                obj_key, obj_version_id = obj["Key"], obj["VersionId"]
-                obj_ver = S3.ObjectVersion(BUCKET.name, obj_key, obj_version_id)
-                print(f"Undeleting {obj_key} deleted on {obj['LastModified']}")
-                obj_ver.delete()
-
-
-def create_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser("Manage S3 HTML indices for PyTorch")
-    parser.add_argument(
-        "prefix",
-        type=str,
-        choices=list(PREFIXES_WITH_HTML.keys()) + ["all"]
-    )
-    parser.add_argument("--do-not-upload", action="store_true")
-    parser.add_argument("--generate-pep503", action="store_true")
-    parser.add_argument("--compute-sha256", action="store_true")
-    return parser
-
-
-def main() -> None:
-    parser = create_parser()
-    args = parser.parse_args()
-    action = "Saving indices" if args.do_not_upload else "Uploading indices"
-    if args.compute_sha256:
-        action = "Computing checksums"
-
-    prefixes = PREFIXES_WITH_HTML if args.prefix == 'all' else [args.prefix]
-    for prefix in prefixes:
-        print(f"INFO: {action} for '{prefix}'")
-        stime = time.time()
-        idx = S3Index.from_S3(prefix=prefix, with_metadata=args.generate_pep503 or args.compute_sha256)
-        etime = time.time()
-        print(f"DEBUG: Fetched {len(idx.objects)} objects for '{prefix}' in {etime-stime:.2f} seconds")
-        if args.compute_sha256:
-            idx.compute_sha256()
-        elif args.do_not_upload:
-            idx.save_legacy_html()
-            if args.generate_pep503:
-                idx.save_pep503_htmls()
-        else:
-            idx.upload_legacy_html()
-            if args.generate_pep503:
-                idx.upload_pep503_htmls()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/s3_management/requirements.txt b/s3_management/requirements.txt
deleted file mode 100644
index fa23e39b1..000000000
--- a/s3_management/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-boto3==1.28.53
-packaging==21.3

From f7d8ebd106818e4c36368204700edb8c8d15e42f Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 15 Jan 2024 22:44:03 +0000
Subject: [PATCH 184/212] [BE] Remove unused nightly_defaults.bat (#1678)

---
 windows/internal/auth.bat             |  46 ------
 windows/internal/nightly_defaults.bat | 201 --------------------------
 windows/internal/publish.bat          |  97 -------------
 windows/internal/upload.bat           |  96 ------------
 4 files changed, 440 deletions(-)
 delete mode 100644 windows/internal/auth.bat
 delete mode 100644 windows/internal/nightly_defaults.bat
 delete mode 100644 windows/internal/publish.bat
 delete mode 100644 windows/internal/upload.bat

diff --git a/windows/internal/auth.bat b/windows/internal/auth.bat
deleted file mode 100644
index c874bce49..000000000
--- a/windows/internal/auth.bat
+++ /dev/null
@@ -1,46 +0,0 @@
-@echo off
-
-: From the following doc, the build won't be triggered if the users don't sign in daily.
-: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/triggers?tabs=yaml&view=vsts#my-build-didnt-run-what-happened
-: To avoid this problem, we can just go through the sign in process using the following command.
-
-:auth_start
-
-if "%RETRY_TIMES%" == "" (
-    set /a RETRY_TIMES=10
-    set /a SLEEP_TIME=2
-) else (
-    set /a RETRY_TIMES=%RETRY_TIMES%-1
-    set /a SLEEP_TIME=%SLEEP_TIME%*2
-)
-
-for /f "usebackq tokens=*" %%i in (`curl -so NUL -w "%%{http_code}" -u %VSTS_AUTH% https://dev.azure.com/pytorch`) do (
-    set STATUS_CODE=%%i
-)
-
-IF NOT "%STATUS_CODE%" == "200" (
-    echo Auth retry times remaining: %RETRY_TIMES%
-    echo Sleep time: %SLEEP_TIME% seconds
-    IF %RETRY_TIMES% EQU 0 (
-        echo Auth failed
-        goto err
-    )
-    waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul
-    goto auth_start
-) ELSE (
-    echo Login Attempt Succeeded
-    goto auth_end
-)
-
-:err
-
-: Throw a warning if it fails
-powershell -c "Write-Warning 'Login Attempt Failed'"
-
-:auth_end
-
-set RETRY_TIMES=
-set SLEEP_TIME=
-set STATUS_CODE=
-
-exit /b 0
diff --git a/windows/internal/nightly_defaults.bat b/windows/internal/nightly_defaults.bat
deleted file mode 100644
index e74d55e0b..000000000
--- a/windows/internal/nightly_defaults.bat
+++ /dev/null
@@ -1,201 +0,0 @@
-@echo off
-
-if "%~1"=="" goto arg_error
-if NOT "%~2"=="" goto arg_error
-goto arg_end
-
-:arg_error
-
-echo Illegal number of parameters. Pass packge type `Conda` or `Wheels`.
-exit /b 1
-
-:arg_end
-
-echo "nightly_defaults.bat at %CD% starting at %DATE%"
-
-set SRC_DIR=%~dp0\..
-
-:: NIGHTLIES_FOLDER
-:: N.B. this is also defined in cron_start.sh
-::   An arbitrary root folder to store all nightlies folders, each of which is a
-::   parent level date folder with separate subdirs for logs, wheels, conda
-::   packages, etc. This should be kept the same across all scripts called in a
-::   cron job, so it only has a default value in the top-most script
-::   build_cron.sh to avoid the default values from diverging.
-if "%NIGHTLIES_FOLDER%" == "" set "NIGHTLIES_FOLDER=%SRC_DIR%"
-
-:: NIGHTLIES_DATE
-:: N.B. this is also defined in cron_start.sh
-::   The date in YYYY_mm_dd format that we are building for. If this is not
-::   already set, then this will first try to find the date of the nightlies
-::   folder that this builder repo exists in; e.g. if this script exists in
-::   some_dir/2019_09_04/builder/cron/ then this will be set to 2019_09_04 (must
-::   match YYYY_mm_dd). This is for convenience when debugging/uploading past
-::   dates, so that you don't have to set NIGHTLIES_DATE yourself. If a date
-::   folder cannot be found in that exact location, then this will default to
-::   the current date.
-
-
-if NOT "%NIGHTLIES_DATE%" == "" goto date_end
-
-:date_start
-
-set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'"
-set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'"
-
-FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i
-FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i
-
-:date_end
-
-if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2%
-
-:: Used in lots of places as the root dir to store all conda/wheel/manywheel
-:: packages as well as logs for the day
-set today=%NIGHTLIES_FOLDER%\%NIGHTLIES_DATE%
-mkdir "%today%" || ver >nul
-
-
-::#############################################################################
-:: Add new configuration variables below this line. 'today' should always be
-:: defined ASAP to avoid weird errors
-::#############################################################################
-
-
-:: List of people to email when things go wrong. This is passed directly to
-:: `mail -t`
-:: TODO: Not supported yet
-if "%NIGHTLIES_EMAIL_LIST%" == "" set NIGHTLIES_EMAIL_LIST=peterghost86@gmail.com
-
-:: PYTORCH_CREDENTIALS_FILE
-::   A bash file that exports credentials needed to upload to aws and anaconda.
-::   Needed variables are PYTORCH_ANACONDA_USERNAME, PYTORCH_ANACONDA_PASSWORD,
-::   AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. Or it can just export the AWS
-::   keys and then prepend a logged-in conda installation to the path.
-:: TODO: Not supported yet
-if "%PYTORCH_CREDENTIALS_FILE%" == "" set PYTORCH_CREDENTIALS_FILE=/c/Users/administrator/nightlies/credentials.sh
-
-:: Location of the temporary miniconda that is downloaded to install conda-build
-:: and aws to upload finished packages TODO this is messy to install this in
-:: upload.sh and later use it in upload_logs.sh
-if "%CONDA_UPLOADER_INSTALLATION%" == "" set "CONDA_UPLOADER_INSTALLATION=%today%\miniconda"
-
-:: N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that
-:: is the script that actually clones the builder repo that /this/ script is
-:: running from.
-pushd "%SRC_DIR%\.."
-set NIGHTLIES_BUILDER_ROOT=%CD%
-popd
-
-:: The shared pytorch repo to be used by all builds
-if "%NIGHTLIES_PYTORCH_ROOT%" == "" set "NIGHTLIES_PYTORCH_ROOT=%today%\pytorch"
-
-:: PYTORCH_REPO
-::   The Github org/user whose fork of Pytorch to check out (git clone
-::   https://github.com/<THIS_PART>/pytorch.git). This will always be cloned
-::   fresh to build with. Default is 'pytorch'
-if "%PYTORCH_REPO%" == "" set PYTORCH_REPO=pytorch
-
-:: PYTORCH_BRANCH
-::   The branch of Pytorch to checkout for building (git checkout <THIS_PART>).
-::   This can either be the name of the branch (e.g. git checkout
-::   my_branch_name) or can be a git commit (git checkout 4b2674n...). Default
-::   is 'latest', which is a special term that signals to pull the last commit
-::   before 0:00 midnight on the NIGHTLIES_DATE
-if "%PYTORCH_BRANCH%" == "" set PYTORCH_BRANCH=nightly
-
-:: Clone the requested pytorch checkout
-if exist "%NIGHTLIES_PYTORCH_ROOT%" goto clone_end
-
-:clone_start
-
-git clone --recursive "https://github.com/%PYTORCH_REPO%/pytorch.git" "%NIGHTLIES_PYTORCH_ROOT%"
-pushd "%NIGHTLIES_PYTORCH_ROOT%"
-
-if NOT "%PYTORCH_BRANCH%" == "latest" goto latest_end
-
-:latest_start
-
-:: Switch to the latest commit by 11:59 yesterday
-echo PYTORCH_BRANCH is set to latest so I will find the last commit
-echo before 0:00 midnight on %NIGHTLIES_DATE%
-set git_date=%NIGHTLIES_DATE:_=-%
-FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i
-echo Setting PYTORCH_BRANCH to %last_commit% since that was the last
-echo commit before %NIGHTLIES_DATE%
-set PYTORCH_BRANCH=%last_commit%
-
-:latest_end
-
-git checkout "%PYTORCH_BRANCH%"
-git submodule update
-popd
-
-:clone_end
-
-if "%CUDA_VERSION%" == "cpu" (
-    set _DESIRED_CUDA=cpu
-) else (
-    set _DESIRED_CUDA=cu%CUDA_VERSION%
-)
-
-:: PYTORCH_BUILD_VERSION
-::   The actual version string. Used in conda like
-::       pytorch-nightly==1.0.0.dev20180908
-::   or in manylinux like
-::       torch_nightly-1.0.0.dev20180908-cp27-cp27m-linux_x86_64.whl
-if "%PYTORCH_BUILD_VERSION%" == "" set PYTORCH_BUILD_VERSION=1.5.0.dev%NIGHTLIES_DATE_COMPACT%
-
-if "%~1" == "Wheels" (
-    if "%BUILD_PYTHONLESS%" == "" (
-        if not "%CUDA_VERSION%" == "102" (
-            set PYTORCH_BUILD_VERSION=%PYTORCH_BUILD_VERSION%+%_DESIRED_CUDA%
-        )
-    )
-)
-
-:: PYTORCH_BUILD_NUMBER
-::   This is usually the number 1. If more than one build is uploaded for the
-::   same version/date, then this can be incremented to 2,3 etc in which case
-::   '.post2' will be appended to the version string of the package. This can
-::   be set to '0' only if OVERRIDE_PACKAGE_VERSION is being used to bypass
-::   all the version string logic in downstream scripts. Since we use the
-::   override below, exporting this shouldn't actually matter.
-if "%PYTORCH_BUILD_NUMBER%" == "" set /a PYTORCH_BUILD_NUMBER=1
-if %PYTORCH_BUILD_NUMBER% GTR 1 set PYTORCH_BUILD_VERSION=%PYTORCH_BUILD_VERSION%%PYTORCH_BUILD_NUMBER%
-
-:: The nightly builds use their own versioning logic, so we override whatever
-:: logic is in setup.py or other scripts
-:: TODO: Not supported yet
-set OVERRIDE_PACKAGE_VERSION=%PYTORCH_BUILD_VERSION%
-
-:: Build folder for conda builds to use
-if "%TORCH_CONDA_BUILD_FOLDER%" == "" set TORCH_CONDA_BUILD_FOLDER=pytorch-nightly
-
-:: TORCH_PACKAGE_NAME
-::   The name of the package to upload. This should probably be pytorch or
-::   pytorch-nightly. N.B. that pip will change all '-' to '_' but conda will
-::   not. This is dealt with in downstream scripts.
-:: TODO: Not supported yet
-if "%TORCH_PACKAGE_NAME%" == "" set TORCH_PACKAGE_NAME=torch
-
-:: PIP_UPLOAD_FOLDER should end in a slash. This is to handle it being empty
-:: (when uploading to e.g. whl/cpu/) and also to handle nightlies (when
-:: uploading to e.g. /whl/nightly/cpu)
-:: TODO: Not supported yet
-if "%PIP_UPLOAD_FOLDER%" == "" set "PIP_UPLOAD_FOLDER=nightly\"
-
-:: The location of the binary_sizes dir in s3 is hardcoded into
-:: upload_binary_sizes.sh
-
-:: DAYS_TO_KEEP
-::   How many days to keep around for clean.sh. Build folders older than this
-::   will be purged at the end of cron jobs. '1' means to keep only the current
-::   day. Values less than 1 are not allowed. The default is 5.
-:: TODO: Not supported yet
-if "%DAYS_TO_KEEP%" == "" set /a DAYS_TO_KEEP=5
-if %DAYS_TO_KEEP% LSS 1 (
-    echo DAYS_TO_KEEP cannot be less than 1.
-    echo A value of 1 means to only keep the build for today
-    exit /b 1
-)
diff --git a/windows/internal/publish.bat b/windows/internal/publish.bat
deleted file mode 100644
index 765fb39fe..000000000
--- a/windows/internal/publish.bat
+++ /dev/null
@@ -1,97 +0,0 @@
-@echo off
-
-set SRC_DIR=%~dp0
-pushd %SRC_DIR%
-
-if not "%CUDA_VERSION%" == "cpu" (
-    set PACKAGE_SUFFIX=_cuda%CUDA_VERSION%
-) else (
-    set PACKAGE_SUFFIX=
-)
-
-if "%PACKAGEFULLNAME%" == "Conda" (
-    set PACKAGE=conda
-) else (
-    set PACKAGE=wheels
-)
-
-if "%DEBUG%" == "1" (
-    if not defined PACKAGE_SUFFIX (
-        set PACKAGE_SUFFIX=_debug
-    ) else (
-        set PACKAGE_SUFFIX=%PACKAGE_SUFFIX%_debug
-    )
-)
-
-if not defined PACKAGE_SUFFIX (
-    set PUBLISH_BRANCH=%PACKAGE%_%DESIRED_PYTHON%
-) else (
-    set PUBLISH_BRANCH=%PACKAGE%_%DESIRED_PYTHON%%PACKAGE_SUFFIX%
-)
-
-git clone %ARTIFACT_REPO_URL% -b %PUBLISH_BRANCH% --single-branch >nul 2>&1
-
-IF ERRORLEVEL 1 (
-    echo Branch %PUBLISH_BRANCH% not exist, falling back to master
-    set NO_BRANCH=1
-    git clone %ARTIFACT_REPO_URL% -b master --single-branch >nul 2>&1
-)
-
-IF ERRORLEVEL 1 (
-    echo Clone failed
-    goto err
-)
-
-cd pytorch_builder
-attrib -s -h -r . /s /d
-
-:: Empty repo
-rd /s /q . || ver >nul
-
-IF NOT EXIST %PACKAGE% mkdir %PACKAGE%
-
-xcopy /S /E /Y ..\..\output\*.* %PACKAGE%\
-
-git config --global user.name "Azure DevOps"
-git config --global user.email peterghost86@gmail.com
-git init
-git checkout --orphan %PUBLISH_BRANCH%
-git remote add origin %ARTIFACT_REPO_URL%
-git add .
-git commit -m "Update artifacts"
-
-:push
-
-if "%RETRY_TIMES%" == "" (
-    set /a RETRY_TIMES=10
-    set /a SLEEP_TIME=2
-) else (
-    set /a RETRY_TIMES=%RETRY_TIMES%-1
-    set /a SLEEP_TIME=%SLEEP_TIME%*2
-)
-
-git push origin %PUBLISH_BRANCH% -f > nul 2>&1
-
-IF ERRORLEVEL 1 (
-    echo Git push retry times remaining: %RETRY_TIMES%
-    echo Sleep time: %SLEEP_TIME% seconds
-    IF %RETRY_TIMES% EQU 0 (
-        echo Push failed
-        goto err
-    )
-    waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul
-    goto push
-) ELSE (
-    set RETRY_TIMES=
-    set SLEEP_TIME=
-)
-
-popd
-
-exit /b 0
-
-:err
-
-popd
-
-exit /b 1
diff --git a/windows/internal/upload.bat b/windows/internal/upload.bat
deleted file mode 100644
index 8be04d841..000000000
--- a/windows/internal/upload.bat
+++ /dev/null
@@ -1,96 +0,0 @@
-@echo off
-
-IF "%CONDA_UPLOADER_INSTALLATION%" == "" goto precheck_fail
-IF "%PYTORCH_FINAL_PACKAGE_DIR%" == "" goto precheck_fail
-IF "%today%" == "" goto precheck_fail
-IF "%PYTORCH_ANACONDA_USERNAME%" == "" goto precheck_fail
-IF "%PYTORCH_ANACONDA_PASSWORD%" == "" goto precheck_fail
-
-goto precheck_pass
-
-:precheck_fail
-
-echo Please run nightly_defaults.bat first.
-echo And remember to set `PYTORCH_FINAL_PACKAGE_DIR`
-echo Finally, don't forget to set anaconda tokens
-exit /b 1
-
-:precheck_pass
-
-pushd %today%
-
-:: Install anaconda client
-set "CONDA_HOME=%CONDA_UPLOADER_INSTALLATION%"
-set "tmp_conda=%CONDA_HOME%"
-set "miniconda_exe=%CD%\miniconda.exe"
-rmdir /s /q "%CONDA_HOME%"
-del miniconda.exe
-curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%"
-popd
-
-IF ERRORLEVEL 1 (
-    echo Conda download failed
-    exit /b 1
-)
-
-call %~dp0\..\..\conda\install_conda.bat
-
-IF ERRORLEVEL 1 (
-    echo Conda installation failed
-    exit /b 1
-)
-
-set "ORIG_PATH=%PATH%"
-set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
-
-REM conda install -y anaconda-client
-pip install git+https://github.com/peterjc123/anaconda-client.git@log_more_meaningfull_errors
-IF ERRORLEVEL 1 (
-    echo Anaconda client installation failed
-    exit /b 1
-)
-
-REM bash -c "yes | anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%""
-anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%"
-IF ERRORLEVEL 1 (
-    echo Anaconda client login failed
-    exit /b 1
-)
-
-set PYTORCH_FINAL_PACKAGE=
-:: Upload all the packages under `PYTORCH_FINAL_PACKAGE_DIR`
-FOR /F "delims=" %%i IN ('where /R %PYTORCH_FINAL_PACKAGE_DIR% *pytorch*.tar.bz2') DO (
-    set "PYTORCH_FINAL_PACKAGE=%%i"
-)
-
-IF "%PYTORCH_FINAL_PACKAGE%" == "" (
-    echo No package to upload
-    exit /b 0
-)
-
-:upload
-
-if "%RETRY_TIMES%" == "" (
-    set /a RETRY_TIMES=10
-    set /a SLEEP_TIME=2
-) else (
-    set /a RETRY_TIMES=%RETRY_TIMES%-1
-    set /a SLEEP_TIME=%SLEEP_TIME%*2
-)
-
-echo Uploading %PYTORCH_FINAL_PACKAGE% to Anaconda Cloud
-anaconda upload "%PYTORCH_FINAL_PACKAGE%" -u pytorch-nightly --label main --force --no-progress
-
-IF ERRORLEVEL 1 (
-    echo Anaconda upload retry times remaining: %RETRY_TIMES%
-    echo Sleep time: %SLEEP_TIME% seconds
-    IF %RETRY_TIMES% EQU 0 (
-        echo Upload failed
-        exit /b 1
-    )
-    waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul
-    goto upload
-) ELSE (
-    set RETRY_TIMES=
-    set SLEEP_TIME=
-)

From 8b67d32929b950c4851066800f5ef57c7646994c Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Thu, 18 Jan 2024 16:15:06 -0800
Subject: [PATCH 185/212] [Conda] Mark `blas * mkl` as x86 only dependency

---
 conda/pytorch-nightly/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index e56fe7f68..9e8f90a94 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -55,7 +55,7 @@ requirements:
     - jinja2
     - pyyaml
     {% if cross_compile_arm64 == 0 %}
-    - blas * mkl
+    - blas * mkl # [x86_64]
     {% endif %}
     - pytorch-mutex 1.0 {{ build_variant }}  # [not osx ]
 {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT', '') }}

From eb78393f1e4bd68134d87e4059b9b25194af7dbb Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Thu, 18 Jan 2024 16:31:04 -0800
Subject: [PATCH 186/212] [Conda] Download arch appropriate Miniconda

By using `$(uname -m)` as suffix, which is arm64 on Apple Silicon and
x86 on Intel Macs
---
 conda/build_pytorch.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 06e72da7c..40b4a64a7 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -199,7 +199,7 @@ if [[ "$(uname)" == 'Darwin' ]]; then
     miniconda_sh="${MAC_PACKAGE_WORK_DIR}/miniconda.sh"
     rm -rf "$tmp_conda"
     rm -f "$miniconda_sh"
-    retry curl -sS https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-x86_64.sh -o "$miniconda_sh"
+    retry curl -sS https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh -o "$miniconda_sh"
     chmod +x "$miniconda_sh" && \
         "$miniconda_sh" -b -p "$tmp_conda" && \
         rm "$miniconda_sh"

From 0d3aea4ee08e00b76fc263ce58e4c10df9f58e44 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Thu, 18 Jan 2024 16:40:39 -0800
Subject: [PATCH 187/212] [Conda] Do not depend on llvmdev-9 on ARM

As earliest available for the platform is llvmdev-11
---
 conda/build_pytorch.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 40b4a64a7..09d4aca8a 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -67,7 +67,7 @@ if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then
 fi
 
 # differentiate package name for cross compilation to avoid collision
-if [[ -n "$CROSS_COMPILE_ARM64" ]]; then
+if [[ -n "$CROSS_COMPILE_ARM64" || "$(uname -m)" == "arm64" ]]; then
     export PYTORCH_LLVM_PACKAGE=""
 fi
 

From 6c6a33b2712bdb4be4406a10f75e3a404541ccd7 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Thu, 18 Jan 2024 17:03:46 -0800
Subject: [PATCH 188/212] [Conda] Set correct developer dir for MacOS runners

---
 conda/build_pytorch.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 09d4aca8a..39aab7ee8 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -104,7 +104,11 @@ if [[ -z "$DESIRED_PYTHON" ]]; then
 fi
 
 if [[ "$OSTYPE" == "darwin"* ]]; then
-    DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer
+    if [[ "$(uname -m)" == "arm64" ]]; then
+        DEVELOPER_DIR=/Applications/Xcode_14.3.1.app/Contents/Developer
+    else
+        DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer
+    fi
 fi
 if [[ "$desired_cuda" == 'cpu' ]]; then
     cpu_only=1

From 74b04f302afede5c25275d8026f34a06330cc515 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Fri, 19 Jan 2024 06:18:52 -0800
Subject: [PATCH 189/212] [Conda] Add llvm-openmp dependency for ARM64

PyTorch for M1 is finally built with OpenMP, so it needs to depend on it
---
 conda/pytorch-nightly/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 9e8f90a94..d29b87018 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -47,7 +47,7 @@ requirements:
     - intel-openmp # [win]
     # llvm-openmp 16 leads to wrong processor affinity for fork child, see #99625.
     # Before a decent fix, force llvm-openmp version <16.
-    - llvm-openmp <16 # [linux]
+    - llvm-openmp <16 # [linux or  arm64]
     - typing_extensions
     - sympy
     - filelock

From 896b6df5f0ce23431bf760ac3090f26bd6c44ee9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ionu=C8=9B=20Man=C8=9Ba?= <ionut@janeasystems.com>
Date: Mon, 22 Jan 2024 19:48:28 +0200
Subject: [PATCH 190/212] Use dynamic MKL on Windows (#1467)

Use dynamic MKL on Windows and updated MKL to 2021.4.0
On conda python 3.12 use mkl 2023.1
---
 conda/pytorch-nightly/bld.bat   |  5 -----
 conda/pytorch-nightly/meta.yaml |  7 +++++--
 windows/build_pytorch.bat       |  6 +-----
 windows/internal/copy.bat       |  4 ++++
 windows/internal/copy_cpu.bat   |  5 +++++
 windows/internal/smoke_test.bat | 19 +++++++++++--------
 6 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/conda/pytorch-nightly/bld.bat b/conda/pytorch-nightly/bld.bat
index 972df7e9c..775256ea7 100644
--- a/conda/pytorch-nightly/bld.bat
+++ b/conda/pytorch-nightly/bld.bat
@@ -34,11 +34,6 @@ if "%desired_cuda%" == "12.1" (
 
 set DISTUTILS_USE_SDK=1
 
-curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
-7z x -aoa mkl_2020.2.254.7z -omkl
-set CMAKE_INCLUDE_PATH=%SRC_DIR%\mkl\include
-set LIB=%SRC_DIR%\mkl\lib;%LIB%
-
 set libuv_ROOT=%PREFIX%\Library
 echo libuv_ROOT=%libuv_ROOT%
 
diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index d29b87018..3da1625c4 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -23,7 +23,8 @@ requirements:
     - mkl-include # [x86_64]
     - mkl=2020.2 # [py <= 311 and x86_64 and not win]
     - mkl=2023.1 # [py >= 312 and x86_64]
-    - mkl=2021.4 # [x86_64 and win and py <= 311]
+    - mkl-devel=2021.4.0 # [x86_64 and win and py<=311]
+    - mkl-devel=2023.1 # [x86_64 and win and py>=312]
     {% endif %}
     - typing_extensions
     - ninja
@@ -41,7 +42,9 @@ requirements:
   run:
     - python
     {% if cross_compile_arm64 == 0 %}
-    - mkl >=2018 # [x86_64]
+    - mkl >=2018 # [x86_64 and not win]
+    - mkl=2021.4 # [x86_64 and win and py <= 311]
+    - mkl=2023.1 # [x86_64 and win and py >= 312]
     {% endif %}
     - libuv # [win]
     - intel-openmp # [win]
diff --git a/windows/build_pytorch.bat b/windows/build_pytorch.bat
index 37e19f933..750d3c5e3 100644
--- a/windows/build_pytorch.bat
+++ b/windows/build_pytorch.bat
@@ -67,10 +67,6 @@ exit /B 1
 :: Install MKL
 rmdir /s /q mkl
 del mkl_2020.2.254.7z
-curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
-7z x -aoa mkl_2020.2.254.7z -omkl
-set CMAKE_INCLUDE_PATH=%cd%\mkl\include
-set LIB=%cd%\mkl\lib;%LIB%
 
 :: Download MAGMA Files on CUDA builds
 set MAGMA_VERSION=2.5.4
@@ -126,7 +122,7 @@ for %%v in (%DESIRED_PYTHON_PREFIX%) do (
     ) else (
         set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%"
     )
-    pip install ninja
+    pip install ninja mkl-include==2021.4.0 mkl-devel==2021.4.0
     @setlocal
     :: Set Flags
     if not "%CUDA_VERSION%"=="cpu" (
diff --git a/windows/internal/copy.bat b/windows/internal/copy.bat
index 490d9593a..9893fc7c5 100755
--- a/windows/internal/copy.bat
+++ b/windows/internal/copy.bat
@@ -11,6 +11,10 @@ copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
 
 copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib
 copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
+IF "%PACKAGE_TYPE%"=="libtorch" (
+    copy "%CONDA_LIB_PATH%\mkl_intel_thread.1.dll" pytorch\torch\lib
+    copy "%CONDA_LIB_PATH%\mkl_core.1.dll" pytorch\torch\lib
+)
 :: Should be set in build_pytorch.bat
 copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
 
diff --git a/windows/internal/copy_cpu.bat b/windows/internal/copy_cpu.bat
index 2dae4613e..0a4c0dabb 100755
--- a/windows/internal/copy_cpu.bat
+++ b/windows/internal/copy_cpu.bat
@@ -1,3 +1,8 @@
 copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
 :: Should be set in build_pytorch.bat
 copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
+
+IF "%PACKAGE_TYPE%"=="libtorch" (
+    copy "%CONDA_LIB_PATH%\mkl_intel_thread.1.dll" pytorch\torch\lib
+    copy "%CONDA_LIB_PATH%\mkl_core.1.dll" pytorch\torch\lib
+)
diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index 1ade2cbda..ce097f6a2 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -54,7 +54,7 @@ if errorlevel 1 exit /b 1
 
 set "PATH=%CD%\Python%PYTHON_VERSION%\Scripts;%CD%\Python;%PATH%"
 
-pip install -q numpy protobuf "mkl>=2019"
+pip install -q numpy protobuf 
 if errorlevel 1 exit /b 1
 
 for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
@@ -87,14 +87,18 @@ set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
 
 conda create -qyn testenv python=%DESIRED_PYTHON%
 if errorlevel 1 exit /b 1
-
+call conda install -yq conda-build 
+if errorlevel 1 exit /b 1
 call %CONDA_HOME%\condabin\activate.bat testenv
 if errorlevel 1 exit /b 1
+set "NO_ARCH_PATH=%PYTORCH_FINAL_PACKAGE_DIR:/=\%\noarch"
+mkdir %NO_ARCH_PATH%
+for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *') do xcopy "%%i" %NO_ARCH_PATH% /Y
+if ERRORLEVEL 1 exit /b 1
+call conda index %PYTORCH_FINAL_PACKAGE_DIR%
+if errorlevel 1 exit /b 1
+call conda install -yq -c "file:///%PYTORCH_FINAL_PACKAGE_DIR%" pytorch==%PYTORCH_BUILD_VERSION% -c pytorch -c numba/label/dev -c nvidia
 
-:: do conda install to make sure all the dependencies are installed
-:: Install numpy see: https://github.com/pytorch/pytorch/issues/107228
-:: todo: Remove numpy install once the issue above is resolved
-call conda install -yq numpy pytorch %CONDA_EXTRA_ARGS%
 if ERRORLEVEL 1 exit /b 1
 
 set /a CUDA_VER=%CUDA_VERSION%
@@ -103,8 +107,7 @@ set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1%
 set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
 
 :: Install package we just build
-for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.tar.bz2') do call conda install -yq "%%i" --offline
-if ERRORLEVEL 1 exit /b 1
+
 
 :smoke_test
 python -c "import torch"

From 122ff0d0af5b283512e022ea92a94c272c8ce335 Mon Sep 17 00:00:00 2001
From: henrylhtsang <91030427+henrylhtsang@users.noreply.github.com>
Date: Mon, 22 Jan 2024 17:55:03 -0800
Subject: [PATCH 191/212] Add torchrec to promote s3 script (#1680)

* Add torchrec to promote s3 script

* Add torchrec version to release_version.sh
---
 release/promote.sh          | 2 ++
 release/release_versions.sh | 1 +
 2 files changed, 3 insertions(+)

diff --git a/release/promote.sh b/release/promote.sh
index a7f273bc1..54e21ce7e 100644
--- a/release/promote.sh
+++ b/release/promote.sh
@@ -11,6 +11,7 @@ TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.1}
 TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
 TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
+TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
 
 DRY_RUN=${DRY_RUN:-enabled}
 
@@ -102,6 +103,7 @@ promote_pypi() {
 # promote_s3 torchaudio whl "${TORCHAUDIO_VERSION}"
 # promote_s3 torchtext whl "${TORCHTEXT_VERSION}"
 # promote_s3 torchdata whl "${TORCHDATA_VERSION}"
+# promote_s3 torchrec whl "${TORCHREC_VERSION}"
 # promote_s3 "libtorch-*" libtorch "${PYTORCH_VERSION}"
 
 # promote_conda torchtriton conda "2.1.0"
diff --git a/release/release_versions.sh b/release/release_versions.sh
index d362cb1ca..53dbe435b 100644
--- a/release/release_versions.sh
+++ b/release/release_versions.sh
@@ -6,3 +6,4 @@ TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.1}
 TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
 TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
+TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
\ No newline at end of file

From 7d704653442c5a84048bfe4b6cd7e619f157cada Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 23 Jan 2024 22:12:16 +0000
Subject: [PATCH 192/212] Revert "Dynamic MKL windows" (#1682)

---
 conda/pytorch-nightly/bld.bat   |  5 +++++
 conda/pytorch-nightly/meta.yaml |  7 ++-----
 windows/build_pytorch.bat       |  6 +++++-
 windows/internal/copy.bat       |  4 ----
 windows/internal/copy_cpu.bat   |  5 -----
 windows/internal/smoke_test.bat | 19 ++++++++-----------
 6 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/conda/pytorch-nightly/bld.bat b/conda/pytorch-nightly/bld.bat
index 775256ea7..972df7e9c 100644
--- a/conda/pytorch-nightly/bld.bat
+++ b/conda/pytorch-nightly/bld.bat
@@ -34,6 +34,11 @@ if "%desired_cuda%" == "12.1" (
 
 set DISTUTILS_USE_SDK=1
 
+curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
+7z x -aoa mkl_2020.2.254.7z -omkl
+set CMAKE_INCLUDE_PATH=%SRC_DIR%\mkl\include
+set LIB=%SRC_DIR%\mkl\lib;%LIB%
+
 set libuv_ROOT=%PREFIX%\Library
 echo libuv_ROOT=%libuv_ROOT%
 
diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index 3da1625c4..d29b87018 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -23,8 +23,7 @@ requirements:
     - mkl-include # [x86_64]
     - mkl=2020.2 # [py <= 311 and x86_64 and not win]
     - mkl=2023.1 # [py >= 312 and x86_64]
-    - mkl-devel=2021.4.0 # [x86_64 and win and py<=311]
-    - mkl-devel=2023.1 # [x86_64 and win and py>=312]
+    - mkl=2021.4 # [x86_64 and win and py <= 311]
     {% endif %}
     - typing_extensions
     - ninja
@@ -42,9 +41,7 @@ requirements:
   run:
     - python
     {% if cross_compile_arm64 == 0 %}
-    - mkl >=2018 # [x86_64 and not win]
-    - mkl=2021.4 # [x86_64 and win and py <= 311]
-    - mkl=2023.1 # [x86_64 and win and py >= 312]
+    - mkl >=2018 # [x86_64]
     {% endif %}
     - libuv # [win]
     - intel-openmp # [win]
diff --git a/windows/build_pytorch.bat b/windows/build_pytorch.bat
index 750d3c5e3..37e19f933 100644
--- a/windows/build_pytorch.bat
+++ b/windows/build_pytorch.bat
@@ -67,6 +67,10 @@ exit /B 1
 :: Install MKL
 rmdir /s /q mkl
 del mkl_2020.2.254.7z
+curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
+7z x -aoa mkl_2020.2.254.7z -omkl
+set CMAKE_INCLUDE_PATH=%cd%\mkl\include
+set LIB=%cd%\mkl\lib;%LIB%
 
 :: Download MAGMA Files on CUDA builds
 set MAGMA_VERSION=2.5.4
@@ -122,7 +126,7 @@ for %%v in (%DESIRED_PYTHON_PREFIX%) do (
     ) else (
         set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%"
     )
-    pip install ninja mkl-include==2021.4.0 mkl-devel==2021.4.0
+    pip install ninja
     @setlocal
     :: Set Flags
     if not "%CUDA_VERSION%"=="cpu" (
diff --git a/windows/internal/copy.bat b/windows/internal/copy.bat
index 9893fc7c5..490d9593a 100755
--- a/windows/internal/copy.bat
+++ b/windows/internal/copy.bat
@@ -11,10 +11,6 @@ copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
 
 copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib
 copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
-IF "%PACKAGE_TYPE%"=="libtorch" (
-    copy "%CONDA_LIB_PATH%\mkl_intel_thread.1.dll" pytorch\torch\lib
-    copy "%CONDA_LIB_PATH%\mkl_core.1.dll" pytorch\torch\lib
-)
 :: Should be set in build_pytorch.bat
 copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
 
diff --git a/windows/internal/copy_cpu.bat b/windows/internal/copy_cpu.bat
index 0a4c0dabb..2dae4613e 100755
--- a/windows/internal/copy_cpu.bat
+++ b/windows/internal/copy_cpu.bat
@@ -1,8 +1,3 @@
 copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
 :: Should be set in build_pytorch.bat
 copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
-
-IF "%PACKAGE_TYPE%"=="libtorch" (
-    copy "%CONDA_LIB_PATH%\mkl_intel_thread.1.dll" pytorch\torch\lib
-    copy "%CONDA_LIB_PATH%\mkl_core.1.dll" pytorch\torch\lib
-)
diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index ce097f6a2..1ade2cbda 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -54,7 +54,7 @@ if errorlevel 1 exit /b 1
 
 set "PATH=%CD%\Python%PYTHON_VERSION%\Scripts;%CD%\Python;%PATH%"
 
-pip install -q numpy protobuf 
+pip install -q numpy protobuf "mkl>=2019"
 if errorlevel 1 exit /b 1
 
 for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
@@ -87,18 +87,14 @@ set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
 
 conda create -qyn testenv python=%DESIRED_PYTHON%
 if errorlevel 1 exit /b 1
-call conda install -yq conda-build 
-if errorlevel 1 exit /b 1
+
 call %CONDA_HOME%\condabin\activate.bat testenv
 if errorlevel 1 exit /b 1
-set "NO_ARCH_PATH=%PYTORCH_FINAL_PACKAGE_DIR:/=\%\noarch"
-mkdir %NO_ARCH_PATH%
-for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *') do xcopy "%%i" %NO_ARCH_PATH% /Y
-if ERRORLEVEL 1 exit /b 1
-call conda index %PYTORCH_FINAL_PACKAGE_DIR%
-if errorlevel 1 exit /b 1
-call conda install -yq -c "file:///%PYTORCH_FINAL_PACKAGE_DIR%" pytorch==%PYTORCH_BUILD_VERSION% -c pytorch -c numba/label/dev -c nvidia
 
+:: do conda install to make sure all the dependencies are installed
+:: Install numpy see: https://github.com/pytorch/pytorch/issues/107228
+:: todo: Remove numpy install once the issue above is resolved
+call conda install -yq numpy pytorch %CONDA_EXTRA_ARGS%
 if ERRORLEVEL 1 exit /b 1
 
 set /a CUDA_VER=%CUDA_VERSION%
@@ -107,7 +103,8 @@ set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1%
 set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
 
 :: Install package we just build
-
+for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.tar.bz2') do call conda install -yq "%%i" --offline
+if ERRORLEVEL 1 exit /b 1
 
 :smoke_test
 python -c "import torch"

From 0816ae7d1acdbcdfd7d51418db22f72fdec8030d Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 23 Jan 2024 22:19:40 +0000
Subject: [PATCH 193/212] Revert "Revert "Dynamic MKL windows"" (#1683)

---
 conda/pytorch-nightly/bld.bat   |  5 -----
 conda/pytorch-nightly/meta.yaml |  7 +++++--
 windows/build_pytorch.bat       |  6 +-----
 windows/internal/copy.bat       |  4 ++++
 windows/internal/copy_cpu.bat   |  5 +++++
 windows/internal/smoke_test.bat | 19 +++++++++++--------
 6 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/conda/pytorch-nightly/bld.bat b/conda/pytorch-nightly/bld.bat
index 972df7e9c..775256ea7 100644
--- a/conda/pytorch-nightly/bld.bat
+++ b/conda/pytorch-nightly/bld.bat
@@ -34,11 +34,6 @@ if "%desired_cuda%" == "12.1" (
 
 set DISTUTILS_USE_SDK=1
 
-curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
-7z x -aoa mkl_2020.2.254.7z -omkl
-set CMAKE_INCLUDE_PATH=%SRC_DIR%\mkl\include
-set LIB=%SRC_DIR%\mkl\lib;%LIB%
-
 set libuv_ROOT=%PREFIX%\Library
 echo libuv_ROOT=%libuv_ROOT%
 
diff --git a/conda/pytorch-nightly/meta.yaml b/conda/pytorch-nightly/meta.yaml
index d29b87018..3da1625c4 100644
--- a/conda/pytorch-nightly/meta.yaml
+++ b/conda/pytorch-nightly/meta.yaml
@@ -23,7 +23,8 @@ requirements:
     - mkl-include # [x86_64]
     - mkl=2020.2 # [py <= 311 and x86_64 and not win]
     - mkl=2023.1 # [py >= 312 and x86_64]
-    - mkl=2021.4 # [x86_64 and win and py <= 311]
+    - mkl-devel=2021.4.0 # [x86_64 and win and py<=311]
+    - mkl-devel=2023.1 # [x86_64 and win and py>=312]
     {% endif %}
     - typing_extensions
     - ninja
@@ -41,7 +42,9 @@ requirements:
   run:
     - python
     {% if cross_compile_arm64 == 0 %}
-    - mkl >=2018 # [x86_64]
+    - mkl >=2018 # [x86_64 and not win]
+    - mkl=2021.4 # [x86_64 and win and py <= 311]
+    - mkl=2023.1 # [x86_64 and win and py >= 312]
     {% endif %}
     - libuv # [win]
     - intel-openmp # [win]
diff --git a/windows/build_pytorch.bat b/windows/build_pytorch.bat
index 37e19f933..750d3c5e3 100644
--- a/windows/build_pytorch.bat
+++ b/windows/build_pytorch.bat
@@ -67,10 +67,6 @@ exit /B 1
 :: Install MKL
 rmdir /s /q mkl
 del mkl_2020.2.254.7z
-curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
-7z x -aoa mkl_2020.2.254.7z -omkl
-set CMAKE_INCLUDE_PATH=%cd%\mkl\include
-set LIB=%cd%\mkl\lib;%LIB%
 
 :: Download MAGMA Files on CUDA builds
 set MAGMA_VERSION=2.5.4
@@ -126,7 +122,7 @@ for %%v in (%DESIRED_PYTHON_PREFIX%) do (
     ) else (
         set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%"
     )
-    pip install ninja
+    pip install ninja mkl-include==2021.4.0 mkl-devel==2021.4.0
     @setlocal
     :: Set Flags
     if not "%CUDA_VERSION%"=="cpu" (
diff --git a/windows/internal/copy.bat b/windows/internal/copy.bat
index 490d9593a..9893fc7c5 100755
--- a/windows/internal/copy.bat
+++ b/windows/internal/copy.bat
@@ -11,6 +11,10 @@ copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
 
 copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib
 copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
+IF "%PACKAGE_TYPE%"=="libtorch" (
+    copy "%CONDA_LIB_PATH%\mkl_intel_thread.1.dll" pytorch\torch\lib
+    copy "%CONDA_LIB_PATH%\mkl_core.1.dll" pytorch\torch\lib
+)
 :: Should be set in build_pytorch.bat
 copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
 
diff --git a/windows/internal/copy_cpu.bat b/windows/internal/copy_cpu.bat
index 2dae4613e..0a4c0dabb 100755
--- a/windows/internal/copy_cpu.bat
+++ b/windows/internal/copy_cpu.bat
@@ -1,3 +1,8 @@
 copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
 :: Should be set in build_pytorch.bat
 copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
+
+IF "%PACKAGE_TYPE%"=="libtorch" (
+    copy "%CONDA_LIB_PATH%\mkl_intel_thread.1.dll" pytorch\torch\lib
+    copy "%CONDA_LIB_PATH%\mkl_core.1.dll" pytorch\torch\lib
+)
diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index 1ade2cbda..ce097f6a2 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -54,7 +54,7 @@ if errorlevel 1 exit /b 1
 
 set "PATH=%CD%\Python%PYTHON_VERSION%\Scripts;%CD%\Python;%PATH%"
 
-pip install -q numpy protobuf "mkl>=2019"
+pip install -q numpy protobuf 
 if errorlevel 1 exit /b 1
 
 for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
@@ -87,14 +87,18 @@ set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
 
 conda create -qyn testenv python=%DESIRED_PYTHON%
 if errorlevel 1 exit /b 1
-
+call conda install -yq conda-build 
+if errorlevel 1 exit /b 1
 call %CONDA_HOME%\condabin\activate.bat testenv
 if errorlevel 1 exit /b 1
+set "NO_ARCH_PATH=%PYTORCH_FINAL_PACKAGE_DIR:/=\%\noarch"
+mkdir %NO_ARCH_PATH%
+for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *') do xcopy "%%i" %NO_ARCH_PATH% /Y
+if ERRORLEVEL 1 exit /b 1
+call conda index %PYTORCH_FINAL_PACKAGE_DIR%
+if errorlevel 1 exit /b 1
+call conda install -yq -c "file:///%PYTORCH_FINAL_PACKAGE_DIR%" pytorch==%PYTORCH_BUILD_VERSION% -c pytorch -c numba/label/dev -c nvidia
 
-:: do conda install to make sure all the dependencies are installed
-:: Install numpy see: https://github.com/pytorch/pytorch/issues/107228
-:: todo: Remove numpy install once the issue above is resolved
-call conda install -yq numpy pytorch %CONDA_EXTRA_ARGS%
 if ERRORLEVEL 1 exit /b 1
 
 set /a CUDA_VER=%CUDA_VERSION%
@@ -103,8 +107,7 @@ set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1%
 set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
 
 :: Install package we just build
-for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.tar.bz2') do call conda install -yq "%%i" --offline
-if ERRORLEVEL 1 exit /b 1
+
 
 :smoke_test
 python -c "import torch"

From e6c514248ad026a2c6494fcd402bb5976f461ce1 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 24 Jan 2024 12:46:10 -0500
Subject: [PATCH 194/212] Add numpy install to windows conda tests (#1684)

---
 windows/internal/smoke_test.bat | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index ce097f6a2..e1980fed3 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -54,7 +54,7 @@ if errorlevel 1 exit /b 1
 
 set "PATH=%CD%\Python%PYTHON_VERSION%\Scripts;%CD%\Python;%PATH%"
 
-pip install -q numpy protobuf 
+pip install -q numpy protobuf
 if errorlevel 1 exit /b 1
 
 for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
@@ -87,7 +87,7 @@ set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
 
 conda create -qyn testenv python=%DESIRED_PYTHON%
 if errorlevel 1 exit /b 1
-call conda install -yq conda-build 
+call conda install -yq conda-build numpy
 if errorlevel 1 exit /b 1
 call %CONDA_HOME%\condabin\activate.bat testenv
 if errorlevel 1 exit /b 1

From c162c7579a3b49c4b701f9d6d38f8af8cb2bdd2e Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 24 Jan 2024 14:05:55 -0500
Subject: [PATCH 195/212] Windows conda test. Install numpy in conda testenv
 (#1685)

---
 windows/internal/smoke_test.bat | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index e1980fed3..8c5aed2ef 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -87,7 +87,7 @@ set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
 
 conda create -qyn testenv python=%DESIRED_PYTHON%
 if errorlevel 1 exit /b 1
-call conda install -yq conda-build numpy
+call conda install -yq conda-build
 if errorlevel 1 exit /b 1
 call %CONDA_HOME%\condabin\activate.bat testenv
 if errorlevel 1 exit /b 1
@@ -98,7 +98,8 @@ if ERRORLEVEL 1 exit /b 1
 call conda index %PYTORCH_FINAL_PACKAGE_DIR%
 if errorlevel 1 exit /b 1
 call conda install -yq -c "file:///%PYTORCH_FINAL_PACKAGE_DIR%" pytorch==%PYTORCH_BUILD_VERSION% -c pytorch -c numba/label/dev -c nvidia
-
+if ERRORLEVEL 1 exit /b 1
+call conda install -yq numpy
 if ERRORLEVEL 1 exit /b 1
 
 set /a CUDA_VER=%CUDA_VERSION%

From 55b339d2a9f21ec1c5c5ead7b4762e675929bd07 Mon Sep 17 00:00:00 2001
From: Supadchaya <138070207+spcyppt@users.noreply.github.com>
Date: Wed, 24 Jan 2024 14:38:06 -0800
Subject: [PATCH 196/212] Add fbgemm to promote s3 script (#1681)

---
 release/promote.sh          | 2 ++
 release/release_versions.sh | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/release/promote.sh b/release/promote.sh
index 54e21ce7e..5bf7fe0b5 100644
--- a/release/promote.sh
+++ b/release/promote.sh
@@ -12,6 +12,7 @@ TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
 TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
 TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
+FBGEMMGPU_VERSION=${FBGEMMGPU_VERSION:-0.6.0}
 
 DRY_RUN=${DRY_RUN:-enabled}
 
@@ -104,6 +105,7 @@ promote_pypi() {
 # promote_s3 torchtext whl "${TORCHTEXT_VERSION}"
 # promote_s3 torchdata whl "${TORCHDATA_VERSION}"
 # promote_s3 torchrec whl "${TORCHREC_VERSION}"
+# promote_s3 fbgemm-gpu whl "${FBGEMMGPU_VERSION}"
 # promote_s3 "libtorch-*" libtorch "${PYTORCH_VERSION}"
 
 # promote_conda torchtriton conda "2.1.0"
diff --git a/release/release_versions.sh b/release/release_versions.sh
index 53dbe435b..981a18ea0 100644
--- a/release/release_versions.sh
+++ b/release/release_versions.sh
@@ -6,4 +6,5 @@ TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.1}
 TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
 TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
-TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
\ No newline at end of file
+TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
+FBGEMMGPU_VERSION=${FBGEMMGPU_VERSION:-0.6.0}

From b8539eb5202b27eabc6ca3dc632663bf9f13f71f Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Wed, 24 Jan 2024 20:23:44 -0500
Subject: [PATCH 197/212] Release 2.2.0 pypi prep script modifications (#1686)

---
 .../validate-repackaged-binary-sizes.yml      | 54 +++++++++----------
 release/pypi/prep_binary_for_pypi.sh          |  8 ---
 release/pypi/promote_pypi_to_staging.sh       |  3 +-
 release/pypi/upload_pypi_to_staging.sh        |  2 -
 release/release_versions.sh                   |  8 +--
 5 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/validate-repackaged-binary-sizes.yml b/.github/workflows/validate-repackaged-binary-sizes.yml
index 695c68d3a..cb1a6a73e 100644
--- a/.github/workflows/validate-repackaged-binary-sizes.yml
+++ b/.github/workflows/validate-repackaged-binary-sizes.yml
@@ -23,66 +23,64 @@ jobs:
       fail-fast: false
       matrix:
         whl:
-          - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp310-cp310-linux_x86_64.whl
-            python: "3.10"  # python version to use for smoke tests
-            upload_artifact: false # upload the repackaged binary as an artifact
-          - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp37-cp37m-linux_x86_64.whl
-            python: "3.7"
+          - url: https://download.pytorch.org/whl/test/cu121/torch-2.2.0%2Bcu121-cp312-cp312-linux_x86_64.whl
+            python: "3.12"
             artifact: false
-          - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp38-cp38-linux_x86_64.whl
-            python: "3.8"
+          - url: https://download.pytorch.org/whl/test/cu121/torch-2.2.0%2Bcu121-cp311-cp311-linux_x86_64.whl
+            python: "3.11"  # python version to use for smoke tests
+            upload_artifact: false # upload the repackaged binary as an artifact
+          - url: https://download.pytorch.org/whl/test/cu121/torch-2.2.0%2Bcu121-cp310-cp310-linux_x86_64.whl
+            python: "3.10"
             artifact: false
-          - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp39-cp39-linux_x86_64.whl
+          - url: https://download.pytorch.org/whl/test/cu121/torch-2.2.0%2Bcu121-cp39-cp39-linux_x86_64.whl
             python: "3.9"
             artifact: false
-     #    - url: https://download.pytorch.org/whl/test/cu117_pypi_cudnn/torch-1.13.1%2Bcu117.with.pypi.cudnn-cp311-cp311-linux_x86_64.whl
-     #      python: "3.11"
-     #      artifact: false
+          - url: https://download.pytorch.org/whl/test/cu121/torch-2.2.0%2Bcu121-cp38-cp38-linux_x86_64.whl
+            python: "3.8"
+            artifact: false
 
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
-      runner: linux.4xlarge.nvidia.gpu
+      runner: linux.g5.4xlarge.nvidia.gpu
       job-name: "Validate binary size"
       upload-artifact: ${{ matrix.whl.upload_artifact == 'true' && 'repackaged-binary' || '' }}
       script: |
         set -ex
         export ENV_NAME="conda-env-${{ github.run_id }}"
-        export GPU_ARCH_VER="11.7"
-        export GPU_ARCH_TYPE="cuda"
-        export CUDA_VER="11.7"
+        export MATRIX_GPU_ARCH_VERSION="12.1"
+        export MATRIX_GPU_ARCH_TYPE="cuda"
+        export MATRIX_CUDA_VER="12.1"
         export DESIRED_PYTHON="${{ matrix.whl.python }}"
-        export DESIRED_CUDA="cu117"
-        export PACKAGE_TYPE="wheel"
+        export MATRIX_PACKAGE_TYPE="wheel"
         export TARGET_OS="linux"
-        export INSTALLATION=""
-        
+
         # install zip
         sudo yum install zip -y
-        
+
         # install patchelf
         chmod a+x common/install_patchelf.sh
         sudo common/install_patchelf.sh
-        
+
         # download torch whl
         wget ${{ matrix.whl.url }}
         FILENAME=$(ls -1 *.whl | head -n 1)
         SIZE_BEFORE=$(du -h $FILENAME | cut -f1)
-        
+
         # repackage into manywheel
         release/pypi/prep_binary_for_pypi.sh $FILENAME
-        
+
         NEW_FILENAME=$(ls -1 *.whl | head -n 1)
         echo "::notice:: $FILENAME before: $SIZE_BEFORE after: $(du -h $NEW_FILENAME | cut -f1)"
-        
+
         # cp to ${RUNNER_ARTIFACT_DIR}
         cp $NEW_FILENAME ${RUNNER_ARTIFACT_DIR}/
-        
+
         # create conda env
         conda create -y -n $ENV_NAME python=$DESIRED_PYTHON
         conda activate $ENV_NAME
-        
+
         # install torch
         pip install numpy pillow $NEW_FILENAME
-        
+
         # run smoke test
-        python ./test/smoke_test/smoke_test.py --package=torchonly
\ No newline at end of file
+        python ./test/smoke_test/smoke_test.py --package=torchonly
diff --git a/release/pypi/prep_binary_for_pypi.sh b/release/pypi/prep_binary_for_pypi.sh
index fdd9bf4a0..154b22852 100755
--- a/release/pypi/prep_binary_for_pypi.sh
+++ b/release/pypi/prep_binary_for_pypi.sh
@@ -52,14 +52,6 @@ for whl_file in "$@"; do
     (
         set -x
 
-        # Special build with pypi cudnn remove it from version
-        if [[ $whl_file == *"with.pypi.cudnn"* ]]; then
-            rm -rf "${whl_dir}/caffe2"
-            rm -rf "${whl_dir}"/torch/lib/libnvrtc*
-
-            sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py"
-        fi
-
         find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \;
         # Moves distinfo from one with a version suffix to one without
         # Example: torch-1.8.0+cpu.dist-info => torch-1.8.0.dist-info
diff --git a/release/pypi/promote_pypi_to_staging.sh b/release/pypi/promote_pypi_to_staging.sh
index 46cd958cd..dbc00e24a 100644
--- a/release/pypi/promote_pypi_to_staging.sh
+++ b/release/pypi/promote_pypi_to_staging.sh
@@ -21,13 +21,12 @@ upload_pypi_to_staging() {
 }
 
 # Uncomment these to promote to pypi
-PYTORCH_LINUX_VERSION_SUFFIX="%2Bcu121.with.pypi.cudnn"
 LINUX_VERSION_SUFFIX="%2Bcu121"
 CPU_VERSION_SUFFIX="%2Bcpu"
 MACOS_X86_64="macosx_.*_x86_64"
 MACOS_ARM64="macosx_.*_arm64"
 
-PLATFORM="linux_x86_64"          VERSION_SUFFIX="${PYTORCH_LINUX_VERSION_SUFFIX}" upload_pypi_to_staging torch "${PYTORCH_VERSION}"
+PLATFORM="linux_x86_64"          VERSION_SUFFIX="${LINUX_VERSION_SUFFIX}"         upload_pypi_to_staging torch "${PYTORCH_VERSION}"
 PLATFORM="manylinux2014_aarch64" VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}"
 PLATFORM="win_amd64"             VERSION_SUFFIX="${CPU_VERSION_SUFFIX}"           upload_pypi_to_staging torch "${PYTORCH_VERSION}"
 PLATFORM="${MACOS_X86_64}"       VERSION_SUFFIX=""                                upload_pypi_to_staging torch "${PYTORCH_VERSION}" # intel mac
diff --git a/release/pypi/upload_pypi_to_staging.sh b/release/pypi/upload_pypi_to_staging.sh
index b1a7ddf6d..f00271715 100644
--- a/release/pypi/upload_pypi_to_staging.sh
+++ b/release/pypi/upload_pypi_to_staging.sh
@@ -33,10 +33,8 @@ pushd "${output_tmp_dir}"
 # Dry run by default
 DRY_RUN=${DRY_RUN:-enabled}
 # On dry run just echo the commands that are meant to be run
-TWINE_UPLOAD="echo twine upload"
 DRY_RUN_FLAG="--dryrun"
 if [[ $DRY_RUN = "disabled" ]]; then
-    TWINE_UPLOAD="twine upload"
     DRY_RUN_FLAG=""
 fi
 
diff --git a/release/release_versions.sh b/release/release_versions.sh
index 981a18ea0..88b6cbcd3 100644
--- a/release/release_versions.sh
+++ b/release/release_versions.sh
@@ -1,10 +1,10 @@
 #!/usr/bin/env bash
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-2.1.1}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.1}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.2.0}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.17.0}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.2.0}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.17.0}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
 TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
 FBGEMMGPU_VERSION=${FBGEMMGPU_VERSION:-0.6.0}

From 42852bb99ef7b61068c541c146ce202f22692019 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 26 Jan 2024 12:10:09 -0500
Subject: [PATCH 198/212] [Analytics] add pypi staging validations, remove
 circleci script (#1688)

---
 analytics/circleci_analyze.py           | 596 ------------------------
 analytics/validate_pypi_staging.py      | 124 +++++
 release/README.md                       |   7 +
 release/pypi/promote_pypi_to_staging.sh |   9 +-
 4 files changed, 136 insertions(+), 600 deletions(-)
 delete mode 100755 analytics/circleci_analyze.py
 create mode 100644 analytics/validate_pypi_staging.py

diff --git a/analytics/circleci_analyze.py b/analytics/circleci_analyze.py
deleted file mode 100755
index 03e8c3e9a..000000000
--- a/analytics/circleci_analyze.py
+++ /dev/null
@@ -1,596 +0,0 @@
-#!/usr/bin/env python3.7
-from datetime import datetime, time
-import json
-import requests
-import itertools
-import sqlite3
-import os
-import sys
-from typing import Callable, Dict, Generator, List, MutableSet, Optional
-
-
-def get_executor_price_rate(executor):
-    (etype, eclass) = executor['type'], executor['resource_class']
-    assert etype in ['machine', 'external', 'docker', 'macos', 'runner'], f'Unexpected type {etype}:{eclass}'
-    if etype == 'machine':
-        return {
-            'medium': 10,
-            'large': 20,
-            'xlarge': 100,
-            '2xlarge': 200,
-            'gpu.medium': 160,
-            'gpu.large': 320,
-            'gpu.small': 80,
-            'windows.medium': 40,
-            'windows.large': 120,
-            'windows.xlarge': 210,
-            'windows.2xlarge': 500,
-            'windows.gpu.nvidia.medium': 500,
-            'gpu.nvidia.small': 160,
-            'gpu.nvidia.medium': 240,
-            'gpu.nvidia.large': 1000,
-        }[eclass]
-    if etype == 'macos':
-        return {
-            'medium': 50,
-            'large': 100,
-        }[eclass]
-    if etype == 'docker':
-        return {
-            'small': 5,
-            'medium': 10,
-            'medium+': 15,
-            'large': 20,
-            'xlarge': 40,
-            '2xlarge': 80,
-            '2xlarge+': 100,
-        }[eclass]
-    if etype == 'runner' or etype == 'external':
-        return {
-            'pytorch/amd-gpu': 0,
-        }[eclass]
-    raise RuntimeError(f'Undefined executor {etype}:{eclass}')
-
-
-price_per_credit = 6e-4
-
-
-def get_circleci_token() -> str:
-    token_file_path = os.path.join(os.getenv('HOME'), '.circleci_token')
-    token = os.getenv('CIRCLECI_TOKEN')
-    if token is not None:
-        return token
-    if not os.path.exists(token_file_path):
-        raise RuntimeError('Can not get CirclCI token'
-                           ' neither from CIRCLECI_TOKEN environment variable,'
-                           ' nor via ~/.circleci_token file')
-    with open(token_file_path) as f:
-        return f.read().strip()
-
-
-def is_workflow_in_progress(workflow: Dict) -> bool:
-    return workflow['status'] in ['running', 'not_run', 'failing', 'on_hold']
-
-
-def str2date(val: str) -> datetime:
-    assert val is not None
-    return datetime.fromisoformat(val[:-1] if val.endswith('Z') else val)
-
-
-class CircleCICache:
-    def __init__(self, token: Optional[str], db_name: str = 'circleci-cache.db') -> None:
-        file_folder = os.path.dirname(__file__)
-        self.url_prefix = 'https://circleci.com/api/v2'
-        self.session = requests.session()
-        self.headers = {
-            'Accept': 'application/json',
-            'Circle-Token': token,
-        } if token is not None else None
-        self.db = sqlite3.connect(os.path.join(file_folder, db_name))
-        self.db.execute('CREATE TABLE IF NOT EXISTS jobs(slug TEXT NOT NULL, job_id INTEGER NOT NULL, json TEXT NOT NULL);')
-        self.db.execute('CREATE TABLE IF NOT EXISTS artifacts(slug TEXT NOT NULL, job_id INTEGER NOT NULL, json TEXT NOT NULL);')
-        self.db.execute('CREATE UNIQUE INDEX IF NOT EXISTS jobs_key on jobs(slug, job_id);')
-        self.db.execute('CREATE TABLE IF NOT EXISTS workflows(id TEXT NOT NULL PRIMARY KEY, json TEXT NOT NULL);')
-        self.db.execute('CREATE TABLE IF NOT EXISTS pipeline_workflows(id TEXT NOT NULL PRIMARY KEY, json TEXT NOT NULL);')
-        self.db.execute('CREATE TABLE IF NOT EXISTS pipelines(id TEXT NOT NULL PRIMARY KEY, json TEXT NOT NULL, branch TEXT, revision TEXT);')
-        self.db.commit()
-
-    def is_offline(self) -> bool:
-        return self.headers is None
-
-    def _get_paged_items_list(self, url: str, params: Optional[Dict] = None, item_count: Optional[int] = -1) -> List:
-        rc, token, run_once = [], None, False
-
-        def _should_quit():
-            nonlocal run_once, rc, token
-            if not run_once:
-                run_once = True
-                return False
-            if token is None:
-                return True
-            if item_count is None:
-                return True
-            return item_count >= 0 and len(rc) >= item_count
-
-        if params is None:
-            params = {}
-        while not _should_quit():
-            if token is not None:
-                params['page-token'] = token
-            r = self.session.get(url, params=params, headers=self.headers)
-            try:
-                j = r.json()
-            except json.JSONDecodeError:
-                print(f"Failed to decode {rc}", file=sys.stderr)
-                raise
-            if 'message' in j:
-                raise RuntimeError(f'Failed to get list from {url}: {j["message"]}')
-            token = j['next_page_token']
-            rc.extend(j['items'])
-        return rc
-
-    def get_pipelines(self, project: str = 'github/pytorch/pytorch', branch: Optional[str] = None, item_count: Optional[int] = None) -> List:
-        if self.is_offline():
-            c = self.db.cursor()
-            cmd = "SELECT json from pipelines"
-            if branch is not None:
-                cmd += f" WHERE branch='{branch}'"
-            if item_count is not None and item_count > 0:
-                cmd += f" LIMIT {item_count}"
-            c.execute(cmd)
-            return [json.loads(val[0]) for val in c.fetchall()]
-        rc = self._get_paged_items_list(f'{self.url_prefix}/project/{project}/pipeline', {'branch': branch} if branch is not None else {}, item_count)
-        for pipeline in rc:
-            vcs = pipeline['vcs']
-            pid, branch, revision, pser = pipeline['id'], vcs['branch'], vcs['revision'], json.dumps(pipeline)
-            self.db.execute("INSERT OR REPLACE INTO pipelines(id, branch, revision, json) VALUES (?, ?, ?, ?)", (pid, branch, revision, pser))
-        self.db.commit()
-        return rc
-
-    def get_pipeline_workflows(self, pipeline) -> List:
-        c = self.db.cursor()
-        c.execute("SELECT json FROM pipeline_workflows WHERE id=?", (pipeline,))
-        rc = c.fetchone()
-        if rc is not None:
-            rc = json.loads(rc[0])
-            if not any(is_workflow_in_progress(w) for w in rc) or self.is_offline():
-                return rc
-        if self.is_offline():
-            return []
-        rc = self._get_paged_items_list(f'{self.url_prefix}/pipeline/{pipeline}/workflow')
-        self.db.execute("INSERT OR REPLACE INTO pipeline_workflows(id, json) VALUES (?, ?)", (pipeline, json.dumps(rc)))
-        self.db.commit()
-        return rc
-
-    def get_workflow_jobs(self, workflow, should_cache=True) -> List:
-        c = self.db.cursor()
-        c.execute("select json from workflows where id=?", (workflow,))
-        rc = c.fetchone()
-        if rc is not None:
-            return json.loads(rc[0])
-        if self.is_offline():
-            return []
-        rc = self._get_paged_items_list(f'{self.url_prefix}/workflow/{workflow}/job')
-        if should_cache:
-            self.db.execute("INSERT INTO workflows(id, json) VALUES (?, ?)", (workflow, json.dumps(rc)))
-            self.db.commit()
-        return rc
-
-    def get_job(self, project_slug, job_number) -> Dict:
-        c = self.db.cursor()
-        c.execute("select json from jobs where slug=? and job_id = ?", (project_slug, job_number))
-        rc = c.fetchone()
-        if rc is not None:
-            return json.loads(rc[0])
-        if self.is_offline():
-            return {}
-        r = self.session.get(f'{self.url_prefix}/project/{project_slug}/job/{job_number}', headers=self.headers)
-        try:
-            rc = r.json()
-        except json.JSONDecodeError:
-            print(f"Failed to decode {rc}", file=sys.stderr)
-            raise
-        self.db.execute("INSERT INTO jobs(slug,job_id, json) VALUES (?, ?, ?)", (project_slug, job_number, json.dumps(rc)))
-        self.db.commit()
-        return rc
-
-    def get_job_artifacts(self, project_slug, job_number) -> List[Dict]:
-        c = self.db.cursor()
-        c.execute("select json from artifacts where slug=? and job_id = ?", (project_slug, job_number))
-        rc = c.fetchone()
-        if rc is not None:
-            return json.loads(rc[0])
-        if self.is_offline():
-            return [{}]
-        rc = self._get_paged_items_list(f"{self.url_prefix}/project/{project_slug}/{job_number}/artifacts")
-        self.db.execute("INSERT INTO artifacts(slug,job_id, json) VALUES (?, ?, ?)", (project_slug, job_number, json.dumps(rc)))
-        self.db.commit()
-        return rc
-
-    def get_pipeline_jobs(self, project: str = 'github/pytorch/pytorch', branch: Optional[str] = None, item_count: Optional[int] = None) -> Generator:
-        for pipeline in self.get_pipelines(project, branch, item_count):
-            for workflow in self.get_pipeline_workflows(pipeline['id']):
-                in_progress = is_workflow_in_progress(workflow)
-                for job in self.get_workflow_jobs(workflow['id'], should_cache=not in_progress):
-                    yield (pipeline, workflow, job)
-
-    def get_jobs_summary(self, slug='gh/pytorch/pytorch', workflow='build') -> Dict:
-        items = self._get_paged_items_list(f'{self.url_prefix}/insights/{slug}/workflows/{workflow}/jobs')
-        return {item['name']: item for item in items}
-
-    def get_job_timeseries(self, job_name: str,
-                           slug: str = 'gh/pytorch/pytorch',
-                           workflow: str = 'build',
-                           branch: Optional[str] = None) -> List:
-        params = {'branch': branch} if branch is not None else {}
-        items = self._get_paged_items_list(f'{self.url_prefix}/insights/{slug}/workflows/build/jobs/{job_name}', params)
-        return [(str2date(x['started_at']), x['duration']) for x in items if x['status'] == 'success']
-
-
-def aggregate_by_day(series):
-    rc = {}
-    for (ts, val) in series:
-        date = datetime.combine(ts.date(), time())
-        valcount = [val, 1.0]
-        if date not in rc:
-            rc[date] = valcount
-        else:
-            rc[date] = [sum(x) for x in zip(rc[date], valcount)]
-    return [(x, rc[x][0] / rc[x][1]) for x in sorted(rc.keys())]
-
-
-def filter_names(names: List[str], name_filter: Optional[str] = None) -> List[str]:
-    import re
-    if name_filter is None:
-        return names
-    filters = name_filter.split(",")
-    return [name for name in names if any(re.match(filter, name) for filter in filters)]
-
-
-def common_prefix(names: List[str]) -> str:
-    if len(names) == 0 or len(names[0]) == 0:
-        return ''
-    if len(names) == 1:
-        return names[0]
-    rc = names[0][0]
-    while rc != names[0] and all(name.startswith(rc) for name in names[1:]):
-        rc = names[0][:len(rc) + 1]
-    return rc[:-1]
-
-
-def plot_graph(name_filter: Optional[str] = None,
-               output_file: Optional[str] = None,
-               branch: Optional[str] = None) -> None:
-    import matplotlib.pyplot as plt
-    import matplotlib.dates as mdates
-
-    ci_cache = CircleCICache(token=get_circleci_token())
-    summary = ci_cache.get_jobs_summary()
-    test_jobs = [name for name in summary.keys() if name.startswith('pytorch') and 'test' in name]
-    filtered_jobs = filter_names(test_jobs, name_filter)
-    prefix = common_prefix(filtered_jobs)
-    if len(filtered_jobs) == 0:
-        print(f'Filter "{name_filter}" does not match to any of {test_jobs}')
-        return
-    series = []
-    labels = []
-    styles = [f'{color}{style}' for (style, color) in itertools.product(['-', '--', '-.', ':'], ['b', 'g', 'r', 'c', 'm', 'y', 'k'])]
-    fig, ax = plt.subplots()
-    for name in test_jobs:
-        label = f"{name}(p95 = {int(summary[name]['metrics']['duration_metrics']['p95']/60)} min)"
-        if name not in filtered_jobs:
-            print(label)
-            continue
-        ts = ci_cache.get_job_timeseries(name, branch=branch)
-        if len(ts) == 0:
-            print(f'{label} time series is empty!')
-            continue
-        print(f'{label} time series has {len(ts)} elements')
-        labels.append(label[len(prefix):])
-        series.append(ts)
-        x, y = zip(*aggregate_by_day(ts))
-        plt.plot(x, [i / 60.0 for i in y], styles[len(labels) % len(styles)])
-    plt.legend(labels, loc='upper left')
-    plt.title(f'{prefix} timeseries')
-    ax.set_ylabel("Duration (m)")
-    # Format date
-    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
-    # Rotate tick labels
-    plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
-    if output_file is not None:
-        plt.savefig(output_file)
-    else:
-        plt.show()
-
-
-def print_line(line: str, padding: Optional[int] = None, newline: bool = True) -> None:
-    if padding is not None and len(line) < padding:
-        line += ' ' * (padding - len(line))
-    print(line, end='\n' if newline else '\r', flush=True)
-
-
-def fetch_status(branch=None, item_count=50):
-    isatty = sys.stdout.isatty()
-    padding = os.get_terminal_size().columns - 1 if isatty else None
-    ci_cache = CircleCICache(token=get_circleci_token())
-    print(f"About to fetch {item_count} latest pipelines against {branch if branch is not None else 'all branches'}")
-    pipelines = ci_cache.get_pipelines(branch=branch, item_count=item_count)
-    total_price, total_master_price = 0, 0
-    for pipeline_idx, pipeline in enumerate(pipelines):
-        revision = pipeline['vcs']['revision']
-        branch = pipeline['vcs']['branch']
-        workflows = ci_cache.get_pipeline_workflows(pipeline['id'])
-        known_job_ids = []
-        for workflow in workflows:
-            url = f'https://app.circleci.com/pipelines/github/pytorch/pytorch/{workflow["pipeline_number"]}/workflows/{workflow["id"]}'
-            if is_workflow_in_progress(workflow):
-                print_line(f'Skipping {url} name:{workflow["name"]} status:{workflow["status"]}',
-                           newline=not sys.stdout.isatty())
-                continue
-            rerun = False
-            total_credits, test_credits, gpu_credits, wincpu_credits, wingpu_credits = 0, 0, 0, 0, 0
-            jobs = ci_cache.get_workflow_jobs(workflow['id'])
-            for job in jobs:
-                job_name, job_status, job_number = job['name'], job['status'], job.get('job_number', None)
-                if job_status in ['blocked', 'canceled', 'unauthorized', 'running', 'not_run', 'failing']:
-                    continue
-                if job_number is None:
-                    print(job)
-                    continue
-                if job_number in known_job_ids:
-                    rerun = True
-                    continue
-                job_info = ci_cache.get_job(job['project_slug'], job_number)
-                if 'executor' not in job_info:
-                    print(f'executor not found in {job_info}')
-                    continue
-                job_executor = job_info['executor']
-                resource_class = job_executor['resource_class']
-                if resource_class is None:
-                    print(f'resource_class is none for {job_info}')
-                    continue
-                job_on_gpu = 'gpu' in resource_class
-                job_on_win = 'windows' in resource_class
-                if job_status != 'infrastructure_fail':
-                    duration = str2date(job_info['stopped_at']) - str2date(job_info['started_at'])
-                    job_credits = get_executor_price_rate(job_executor) * int(job_info['duration']) * 1e-3 / 60
-                else:
-                    job_credits, duration = 0, 0
-                job_cost = job_credits * price_per_credit
-                total_credits += job_credits
-                if 'test' in job_name or job_name.startswith('smoke_'):
-                    test_credits += job_credits
-                elif job_on_gpu:
-                    print(f'Running build job {job_name} on GPU!!!')
-                if job_on_gpu:
-                    gpu_credits += job_credits
-                    if job_on_win:
-                        wingpu_credits += job_credits
-                if job_on_win and not job_on_gpu:
-                    wincpu_credits += job_credits
-                known_job_ids.append(job_number)
-                print_line(f'         {job_name} {job_status}  {duration} ${job_cost:.2f}',
-                           padding=padding, newline=not isatty)
-            # Increment totals
-            total_price += total_credits * price_per_credit
-            if branch in ['master', 'nightly', 'postnightly', 'release/1.6']:
-                total_master_price += total_credits * price_per_credit
-            # skip small jobs
-            if total_credits * price_per_credit < .1:
-                continue
-            workflow_status = f'[{pipeline_idx}/{len(pipelines)}]'
-            workflow_status += f' {url} {workflow["name"]} status:{workflow["status"]}'
-            workflow_status += f' price: ${total_credits * price_per_credit:.2f}'
-            workflow_status += ' (Rerun?)' if rerun else ''
-            workflow_status += f'\n\t\tdate: {workflow["created_at"]} branch:{branch} revision:{revision}'
-            workflow_status += f'\n\t\ttotal credits: {int(total_credits)}'
-            if test_credits != 0:
-                workflow_status += f' testing: {100 * test_credits / total_credits:.1f}%'
-            if gpu_credits != 0:
-                workflow_status += f' GPU testing: {100 * gpu_credits / total_credits:.1f}%'
-                if wingpu_credits != 0:
-                    workflow_status += f' WINGPU/GPU: {100 * wingpu_credits / gpu_credits:.1f}%'
-
-            if wincpu_credits != 0:
-                workflow_status += f' Win CPU: {100 * wincpu_credits / total_credits:.1f}%'
-            workflow_status += f' Total: ${total_price:.2f} master fraction: {100 * total_master_price/ total_price:.1f}%'
-            print_line(workflow_status, padding=padding)
-
-
-def plot_heatmap(cov_matrix, names):
-    import numpy as np
-    import matplotlib.pyplot as plt
-    assert cov_matrix.shape == (len(names), len(names))
-    fig, ax = plt.subplots()
-    ax.imshow(cov_matrix)
-    ax.set_xticks(np.arange(len(names)))
-    ax.set_yticks(np.arange(len(names)))
-    ax.set_xticklabels(names)
-    ax.set_yticklabels(names)
-    # Rotate tick labels
-    plt.setp(ax.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor')
-    # Annotate values
-    for i in range(len(names)):
-        for j in range(len(names)):
-            ax.text(j, i, f'{cov_matrix[i, j]:.2f}', ha='center', va='center', color='w')
-    plt.show()
-
-
-def filter_service_jobs(name):
-    if name.startswith('docker'):
-        return True
-    if name.startswith('binary'):
-        return True
-    return False
-
-
-def filter_cuda_test(name):
-    if filter_service_jobs(name):
-        return False
-    if 'libtorch' in name:
-        return False
-    if 'test' not in name:
-        return False
-    # Skip jit-profiling tests
-    if 'jit-profiling' in name:
-        return False
-    if 'cuda11' in name:
-        return False
-    # Skip VS2017 tests
-    if 'vs2017' in name:
-        return False
-    return 'cuda' in name and 'nogpu' not in name
-
-
-def filter_cuda_build(name):
-    if filter_service_jobs(name):
-        return False
-    if 'libtorch' in name:
-        return False
-    return 'cuda' in name and name.endswith('build')
-
-
-def filter_windows_test(name):
-    if filter_service_jobs(name):
-        return False
-    # Skip jit-profiling tests
-    if 'jit-profiling' in name:
-        return False
-    return 'test' in name and 'windows' in name
-
-
-def compute_covariance(branch='master', name_filter: Optional[Callable[[str], bool]] = None):
-    import numpy as np
-    revisions: MutableSet[str] = set()
-    job_summary: Dict[str, Dict[str, float]] = {}
-
-    # Extract data
-    print(f"Computing covariance for {branch if branch is not None else 'all branches'}")
-    ci_cache = CircleCICache(None)
-    pipelines = ci_cache.get_pipelines(branch=branch)
-    for pipeline in pipelines:
-        if pipeline['trigger']['type'] == 'schedule':
-            continue
-        revision = pipeline['vcs']['revision']
-        pipeline_jobs: Dict[str, float] = {}
-        blocked_jobs: MutableSet[str] = set()
-        workflows = ci_cache.get_pipeline_workflows(pipeline['id'])
-        for workflow in workflows:
-            if is_workflow_in_progress(workflow):
-                continue
-            jobs = ci_cache.get_workflow_jobs(workflow['id'])
-            for job in jobs:
-                job_name = job['name']
-                job_status = job['status']
-                # Handle renames
-                if job_name == 'pytorch_linux_xenial_cuda10_1_cudnn7_py3_NO_AVX2_test':
-                    job_name = 'pytorch_linux_xenial_cuda10_1_cudnn7_py3_nogpu_NO_AVX2_test'
-                if job_name == 'pytorch_linux_xenial_cuda10_1_cudnn7_py3_NO_AVX_NO_AVX2_test':
-                    job_name = 'pytorch_linux_xenial_cuda10_1_cudnn7_py3_nogpu_NO_AVX_test'
-                if job_status in ['infrastructure_fail', 'canceled']:
-                    continue
-                if callable(name_filter) and not name_filter(job_name):
-                    continue
-                if job_status == 'blocked':
-                    blocked_jobs.add(job_name)
-                    continue
-                if job_name in blocked_jobs:
-                    blocked_jobs.remove(job_name)
-                result = 1.0 if job_status == 'success' else -1.0
-                pipeline_jobs[job_name] = result
-        # Skip build with blocked job [which usually means build failed due to the test failure]
-        if len(blocked_jobs) != 0:
-            continue
-        # Skip all success workflows
-        if all(result == 1.0 for result in pipeline_jobs.values()):
-            continue
-        revisions.add(revision)
-        for job_name in pipeline_jobs:
-            if job_name not in job_summary:
-                job_summary[job_name] = {}
-            job_summary[job_name][revision] = pipeline_jobs[job_name]
-    # Analyze results
-    job_names = sorted(job_summary.keys())
-    # revisions = sorted(revisions)
-    job_data = np.zeros((len(job_names), len(revisions)), dtype=np.float)
-    print(f"Number of observations: {len(revisions)}")
-    for job_idx, job_name in enumerate(job_names):
-        job_row = job_summary[job_name]
-        for rev_idx, revision in enumerate(revisions):
-            if revision in job_row:
-                job_data[job_idx, rev_idx] = job_row[revision]
-        success_rate = job_data[job_idx, ].sum(where=job_data[job_idx, ] > 0.0) / len(job_row)
-        present_rate = 1.0 * len(job_row) / len(revisions)
-        print(f"{job_name}: missing {100.0 * (1.0 - present_rate):.2f}% success rate: {100 * success_rate:.2f}%")
-    cov_matrix = np.corrcoef(job_data)
-    plot_heatmap(cov_matrix, job_names)
-
-
-def print_artifacts(branch, item_count, name_filter: Callable[[str], bool]) -> None:
-    ci_cache = CircleCICache(token=get_circleci_token())
-    for pipeline, _, job in ci_cache.get_pipeline_jobs(branch=branch, item_count=item_count):
-        revision = pipeline['vcs']['revision']
-        if not name_filter(job["name"]):
-            continue
-        job_number = job.get("job_number")
-        if job_number is None:
-            continue
-        artifacts = ci_cache.get_job_artifacts('gh/pytorch/pytorch', job_number)
-        for artifact in artifacts:
-            name = os.path.basename(artifact['path'])
-            url = artifact["url"]
-            print(f"{revision} {name} {url}")
-
-
-def print_duration(branch, item_count, name_filter: Callable[[str], bool]) -> None:
-    ci_cache = CircleCICache(token=get_circleci_token())
-    for pipeline, workflow, job in ci_cache.get_pipeline_jobs(branch=branch, item_count=item_count):
-        job_name, job_status, job_number = job['name'], job['status'], job.get("job_number")
-        revision = pipeline['vcs']['revision']
-        if not name_filter(job_name) or job_number is None:
-            continue
-        if job_status in ['blocked', 'canceled', 'unauthorized', 'running', 'not_run', 'failing']:
-            continue
-        started_at = str2date(job['started_at'])
-        stopped_at = str2date(job['stopped_at'])
-        duration = stopped_at - started_at
-        print(f"{job_name} {revision} {duration} {started_at}")
-
-
-def parse_arguments():
-    from argparse import ArgumentParser
-    parser = ArgumentParser(description="Download and analyze circle logs")
-    parser.add_argument('--plot-graph', type=str, nargs='?', help="Plot job time trends", const='')
-    parser.add_argument('--output', type=str, help="Output file name for the graphs")
-    parser.add_argument('--get_artifacts', type=str)
-    parser.add_argument('--print-duration', type=str)
-    parser.add_argument('--branch', type=str)
-    parser.add_argument('--item_count', type=int, default=100)
-    parser.add_argument('--compute_covariance', choices=['cuda_test', 'cuda_build', 'windows_test'])
-    return parser.parse_args()
-
-
-if __name__ == '__main__':
-    args = parse_arguments()
-    if args.get_artifacts is not None:
-        print_artifacts(branch=args.branch,
-                        item_count=args.item_count,
-                        name_filter=lambda x: args.get_artifacts in x)
-        sys.exit(0)
-    if args.print_duration is not None:
-        print_duration(branch=args.branch,
-                       item_count=args.item_count,
-                       name_filter=lambda x: args.print_duration in x)
-        sys.exit(0)
-    if args.compute_covariance is not None:
-        name_filter = {
-            'cuda_test': filter_cuda_test,
-            'cuda_build': filter_cuda_build,
-            'windows_test': filter_windows_test,
-        }[args.compute_covariance]
-        compute_covariance(branch=args.branch, name_filter=name_filter)
-        sys.exit(0)
-    if args.plot_graph is not None:
-        plot_graph(args.plot_graph, args.output, args.branch)
-        sys.exit(0)
-    fetch_status(branch=args.branch, item_count=args.item_count)
diff --git a/analytics/validate_pypi_staging.py b/analytics/validate_pypi_staging.py
new file mode 100644
index 000000000..a7104c314
--- /dev/null
+++ b/analytics/validate_pypi_staging.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+
+import os.path
+import shutil
+import tempfile
+import zipfile
+import boto3
+import botocore
+
+
+PLATFORMS = [
+    "manylinux1_x86_64",
+    "manylinux2014_aarch64",
+    "win_amd64",
+    "macosx_10_9_x86_64",
+    "macosx_11_0_arm64",
+]
+PYTHON_VERSIONS = ["cp38", "cp39", "cp310", "cp311", "cp312"]
+S3_PYPI_STAGING = "pytorch-backup"
+PACKAGE_RELEASES = {
+    "torch": "2.2.0",
+    "torchvision": "0.17.0",
+    "torchaudio": "2.2.0",
+    "torchtext": "0.17.0",
+}
+
+PATTERN_V = "Version:"
+PATTERN_RD = "Requires-Dist:"
+
+s3 = boto3.client("s3")
+
+
+def get_size(path):
+    size = os.path.getsize(path)
+    if size < 1024:
+        return f"{size} bytes"
+    elif size < pow(1024, 2):
+        return f"{round(size/1024, 2)} KB"
+    elif size < pow(1024, 3):
+        return f"{round(size/(pow(1024,2)), 2)} MB"
+    elif size < pow(1024, 4):
+        return f"{round(size/(pow(1024,3)), 2)} GB"
+
+
+def generate_expected_builds(platform: str, package: str, release: str) -> list:
+    builds = []
+    for py_version in PYTHON_VERSIONS:
+        py_spec = f"{py_version}-{py_version}"
+        platform_spec = platform
+
+        if package == "torchtext" and (
+            platform == "manylinux2014_aarch64" or py_version == "cp312"
+        ):
+            continue
+
+        # strange macos file nameing
+        if "macos" in platform:
+            if package == "torch":
+                py_spec = f"{py_version}-none"
+            elif "macosx_10_9_x86_64" in platform:
+                platform_spec = "macosx_10_13_x86_64"
+
+        builds.append(
+            f"{package}-{release}-pypi-staging/{package}-{release}-{py_spec}-{platform_spec}.whl"
+        )
+
+    return builds
+
+
+def validate_file_metadata(build: str, package: str, version: str):
+    temp_dir = tempfile.mkdtemp()
+    tmp_file = f"{temp_dir}/{os.path.basename(build)}"
+    s3.download_file(Bucket=S3_PYPI_STAGING, Key=build, Filename=tmp_file)
+    print(f"Downloaded: {tmp_file}  {get_size(tmp_file)}")
+    with zipfile.ZipFile(f"{temp_dir}/{os.path.basename(build)}", "r") as zip_ref:
+        zip_ref.extractall(f"{temp_dir}")
+
+    for i, line in enumerate(
+        open(f"{temp_dir}/{package}-{version}.dist-info/METADATA")
+    ):
+        if line.startswith(PATTERN_V):
+            print(f"{line}", end="")
+            exttracted_version = line.removeprefix(PATTERN_V).strip()
+            if version != exttracted_version:
+                print(
+                    f"FAILURE VERSION DOES NOT MATCH expected {version} got {exttracted_version}"
+                )
+
+        elif line.startswith(PATTERN_RD):
+            print(f"{line}", end="")
+
+    shutil.rmtree(temp_dir)
+
+
+def main():
+    expected_builds = dict.fromkeys(PACKAGE_RELEASES, [])
+
+    # Iterate over platform to gather build information of available conda version.
+    for package in PACKAGE_RELEASES:
+        for platform in PLATFORMS:
+            expected_builds[package] = expected_builds[
+                package
+            ] + generate_expected_builds(platform, package, PACKAGE_RELEASES[package])
+
+    for package in PACKAGE_RELEASES:
+        count = 0
+        for build in expected_builds[package]:
+            try:
+                s3.head_object(Bucket=S3_PYPI_STAGING, Key=build)
+                print(f"Validating filename {os.path.basename(build)}")
+                validate_file_metadata(build, package, PACKAGE_RELEASES[package])
+                count += 1
+            except botocore.exceptions.ClientError as e:
+                if e.response["Error"]["Code"] == "404":
+                    print(f"FAILED 404 Error on {build}")
+                elif e.response["Error"]["Code"] == "403":
+                    print(f"FAILED Unauthorized Error on {build}")
+                else:
+                    print(f"Error on {build}")
+        print(f"Package Validated {count} for {package}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/release/README.md b/release/README.md
index 19fd28bf8..a80144b17 100644
--- a/release/README.md
+++ b/release/README.md
@@ -12,6 +12,13 @@ These are a collection of scripts that are to be used for release activities.
 * Access to upload conda packages to the [`pytorch`](https://anaconda.org/pytorch) conda channel
 * Access to the PyPI repositories (like [torch](https://pypi.org/project/torch))
 
+## Promote pypi to staging
+
+Following steps needed in order to promote pypi to staging:
+1. Edit `release_versions.sh` and set correct version
+2. Run promote script : `./pypi/promote_pypi_to_staging.sh`
+3. Edit and run `../analytics/validate_pypi_staging.py` to perform initial prevalidation of binaries for pypi promotion
+4. Manually inspect and spot check binaries staged for pypi promotion by logging into s3 and downloading packages
 
 ## Promote
 
diff --git a/release/pypi/promote_pypi_to_staging.sh b/release/pypi/promote_pypi_to_staging.sh
index dbc00e24a..e05634c36 100644
--- a/release/pypi/promote_pypi_to_staging.sh
+++ b/release/pypi/promote_pypi_to_staging.sh
@@ -49,7 +49,8 @@ PLATFORM="win_amd64" VERSION_SUFFIX="${CPU_VERSION_SUFFIX}" upload_pypi_to_stagi
 PLATFORM="${MACOS_X86_64}" VERSION_SUFFIX="" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}"
 PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torchtext "${TORCHTEXT_VERSION}"
 
-PLATFORM="manylinux2014_x86_64" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
-PLATFORM="win_amd64" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
-PLATFORM="${MACOS_X86_64}" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
-PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
+# Please note torchdata is not released currently hence turning it off
+#PLATFORM="manylinux2014_x86_64" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
+#PLATFORM="win_amd64" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
+#PLATFORM="${MACOS_X86_64}" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"
+#PLATFORM="${MACOS_ARM64}" VERSION_SUFFIX="" upload_pypi_to_staging torchdata "${TORCHDATA_VERSION}"

From 6f3530cd25ce0b5456febf193d57ef343663a608 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 26 Jan 2024 17:42:48 -0500
Subject: [PATCH 199/212] [Analytics] Pypi validations. Add call to
 check-wheel-contents (#1689)

---
 analytics/validate_pypi_staging.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/analytics/validate_pypi_staging.py b/analytics/validate_pypi_staging.py
index a7104c314..fd5026a9e 100644
--- a/analytics/validate_pypi_staging.py
+++ b/analytics/validate_pypi_staging.py
@@ -2,12 +2,13 @@
 
 import os.path
 import shutil
+import subprocess
 import tempfile
 import zipfile
+
 import boto3
 import botocore
 
-
 PLATFORMS = [
     "manylinux1_x86_64",
     "manylinux2014_aarch64",
@@ -72,6 +73,22 @@ def validate_file_metadata(build: str, package: str, version: str):
     tmp_file = f"{temp_dir}/{os.path.basename(build)}"
     s3.download_file(Bucket=S3_PYPI_STAGING, Key=build, Filename=tmp_file)
     print(f"Downloaded: {tmp_file}  {get_size(tmp_file)}")
+
+    try:
+        check_wheels = subprocess.run(
+            ["check-wheel-contents", tmp_file, "--ignore", "W002,W009,W004"],
+            capture_output=True,
+            text=True,
+            check=True,
+            encoding="utf-8",
+        )
+        print(check_wheels.stdout)
+        print(check_wheels.stderr)
+    except subprocess.CalledProcessError as e:
+        exit_code = e.returncode
+        stderror = e.stderr
+        print(exit_code, stderror)
+
     with zipfile.ZipFile(f"{temp_dir}/{os.path.basename(build)}", "r") as zip_ref:
         zip_ref.extractall(f"{temp_dir}")
 

From 0582b02f0ba73ca3ec5dc26943e7980d5c19d7fc Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 29 Jan 2024 13:28:30 -0500
Subject: [PATCH 200/212] Modify Validate Nightly PyPI Wheel Binary Size to
 pick correct binary (#1690)

---
 .github/workflows/validate-nightly-pypi-wheel-binary-size.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml b/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml
index a995ec817..24fffc16e 100644
--- a/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml
+++ b/.github/workflows/validate-nightly-pypi-wheel-binary-size.yml
@@ -22,5 +22,5 @@ jobs:
       - name: Run validation
         run: |
           python tools/binary_size_validation/binary_size_validation.py \
-              --url https://download.pytorch.org/whl/nightly/torch/ \
-              --include "pypi" --only-latest-version --threshold 750
\ No newline at end of file
+              --url https://download.pytorch.org/whl/nightly/cu121/torch/ \
+              --include "linux" --only-latest-version --threshold 750

From aad5cecd7653aa231964dedfe86227d350a0b969 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 30 Jan 2024 08:25:27 -0500
Subject: [PATCH 201/212] Fix test_ops scripts on release validation testing
 (#1691)

---
 .github/scripts/validate_test_ops.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/validate_test_ops.sh b/.github/scripts/validate_test_ops.sh
index 12963f289..5df646705 100644
--- a/.github/scripts/validate_test_ops.sh
+++ b/.github/scripts/validate_test_ops.sh
@@ -7,7 +7,7 @@ retry () {
 }
 
 BRANCH=""
-if [[ ${MATRIX_CHANNEL} == "test" ]]; then
+if [[ ${MATRIX_CHANNEL} == "test" || ${MATRIX_CHANNEL} == "release" ]]; then
     SHORT_VERSION=${MATRIX_STABLE_VERSION%.*}
     BRANCH="--branch release/${SHORT_VERSION}"
 fi

From 88adb304e04a7eaea46d572490230d09991c4bf8 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 30 Jan 2024 09:38:07 -0500
Subject: [PATCH 202/212] Add option to validate only from download.pytorch.org
 (#1692)

---
 .../workflows/validate-aarch64-linux-binaries.yml | 12 ++++++++++++
 .github/workflows/validate-binaries.yml           | 15 +++++++++++++++
 .github/workflows/validate-linux-binaries.yml     | 11 +++++++++++
 .../workflows/validate-macos-arm64-binaries.yml   | 12 ++++++++++++
 .github/workflows/validate-macos-binaries.yml     | 12 ++++++++++++
 .github/workflows/validate-windows-binaries.yml   | 12 ++++++++++++
 6 files changed, 74 insertions(+)

diff --git a/.github/workflows/validate-aarch64-linux-binaries.yml b/.github/workflows/validate-aarch64-linux-binaries.yml
index 6b1a60d7c..8761df4f9 100644
--- a/.github/workflows/validate-aarch64-linux-binaries.yml
+++ b/.github/workflows/validate-aarch64-linux-binaries.yml
@@ -27,6 +27,11 @@ on:
         default: ""
         required: false
         type: string
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -58,6 +63,11 @@ on:
         default: ""
         required: false
         type: string
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-aarch64-linux-matrix:
@@ -67,6 +77,8 @@ jobs:
       os: linux-aarch64
       channel: ${{ inputs.channel }}
       with-cuda: disable
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
+
   linux-aarch64:
     needs: generate-aarch64-linux-matrix
     strategy:
diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml
index 558be8e56..78e631ab8 100644
--- a/.github/workflows/validate-binaries.yml
+++ b/.github/workflows/validate-binaries.yml
@@ -32,6 +32,11 @@ on:
         default: false
         required: false
         type: boolean
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       os:
@@ -75,6 +80,11 @@ on:
         default: false
         required: false
         type: boolean
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
 
 
 jobs:
@@ -93,6 +103,7 @@ jobs:
       torchonly: ${{ inputs.torchonly }}
       version: ${{ inputs.version }}
       release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
 
   linux:
     if:  inputs.os == 'linux' || inputs.os == 'all'
@@ -105,6 +116,7 @@ jobs:
       version: ${{ inputs.version }}
       release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
       include-test-ops: ${{ inputs.include-test-ops }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
 
   linux-aarch64:
     if:  inputs.os == 'linux-aarch64' || inputs.os == 'all'
@@ -116,6 +128,7 @@ jobs:
       torchonly: ${{ inputs.torchonly }}
       version: ${{ inputs.version }}
       release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
 
   mac:
     if:  inputs.os == 'macos' || inputs.os == 'all'
@@ -127,6 +140,7 @@ jobs:
       torchonly: ${{ inputs.torchonly }}
       version: ${{ inputs.version }}
       release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
 
   mac-arm64:
     if:  inputs.os == 'macos' || inputs.os == 'all'
@@ -138,3 +152,4 @@ jobs:
       torchonly: ${{ inputs.torchonly }}
       version: ${{ inputs.version }}
       release-matrix: ${{ needs.generate-release-matrix.outputs.matrix }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index de5bda999..08507f64d 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -32,6 +32,11 @@ on:
         default: false
         required: false
         type: boolean
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -68,6 +73,11 @@ on:
         default: false
         required: false
         type: boolean
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-linux-matrix:
@@ -76,6 +86,7 @@ jobs:
       package-type: all
       os: linux
       channel: ${{ inputs.channel }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
 
   linux:
     needs: generate-linux-matrix
diff --git a/.github/workflows/validate-macos-arm64-binaries.yml b/.github/workflows/validate-macos-arm64-binaries.yml
index 541183b9a..dea76ffeb 100644
--- a/.github/workflows/validate-macos-arm64-binaries.yml
+++ b/.github/workflows/validate-macos-arm64-binaries.yml
@@ -27,6 +27,11 @@ on:
         default: ""
         required: false
         type: string
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -58,6 +63,11 @@ on:
         default: ""
         required: false
         type: string
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-macos-arm64-matrix:
@@ -66,6 +76,8 @@ jobs:
       package-type: all
       os: macos-arm64
       channel: ${{ inputs.channel }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
+
   macos-arm64:
     needs: generate-macos-arm64-matrix
     strategy:
diff --git a/.github/workflows/validate-macos-binaries.yml b/.github/workflows/validate-macos-binaries.yml
index 9610b36f7..76035a97d 100644
--- a/.github/workflows/validate-macos-binaries.yml
+++ b/.github/workflows/validate-macos-binaries.yml
@@ -27,6 +27,11 @@ on:
         default: ""
         required: false
         type: string
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -58,6 +63,11 @@ on:
         default: ""
         required: false
         type: string
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-macos-matrix:
@@ -66,6 +76,8 @@ jobs:
       package-type: all
       os: macos
       channel: ${{ inputs.channel }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
+
   macos:
     needs: generate-macos-matrix
     strategy:
diff --git a/.github/workflows/validate-windows-binaries.yml b/.github/workflows/validate-windows-binaries.yml
index 1c501cfb3..9d4b3a8c4 100644
--- a/.github/workflows/validate-windows-binaries.yml
+++ b/.github/workflows/validate-windows-binaries.yml
@@ -27,6 +27,11 @@ on:
         default: ""
         required: false
         type: string
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
   workflow_dispatch:
     inputs:
       channel:
@@ -58,6 +63,11 @@ on:
         default: ""
         required: false
         type: string
+      use-only-dl-pytorch-org:
+        description: 'Use only download.pytorch.org when generating wheel install command'
+        default: false
+        required: false
+        type: boolean
 
 jobs:
   generate-windows-matrix:
@@ -66,6 +76,8 @@ jobs:
       package-type: all
       os: windows
       channel: ${{ inputs.channel }}
+      use-only-dl-pytorch-org: ${{ inputs.use-only-dl-pytorch-org }}
+
   win:
     needs: generate-windows-matrix
     strategy:

From add4488dcc3504b9e58bd470a1041501a294e7d2 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 30 Jan 2024 10:27:11 -0500
Subject: [PATCH 203/212] Exclude pipy and poetry tests when
 USE_ONLY_DL_PYTORCH_ORG is set (#1693)

---
 .github/workflows/validate-linux-binaries.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml
index 08507f64d..87c9f7cd8 100644
--- a/.github/workflows/validate-linux-binaries.yml
+++ b/.github/workflows/validate-linux-binaries.yml
@@ -107,13 +107,17 @@ jobs:
         export ENV_NAME="conda-env-${{ github.run_id }}"
         export TORCH_ONLY=${{ inputs.torchonly }}
         export INCLUDE_TEST_OPS=${{ inputs.include-test-ops }}
+        export USE_ONLY_DL_PYTORCH_ORG=${{ inputs.use-only-dl-pytorch-org }}
         export RELEASE_VERSION=${{ inputs.version }}
         export TARGET_OS="linux"
         eval "$(conda shell.bash hook)"
         printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
 
         # Special case PyPi installation package. And Install of PyPi package via poetry
-        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && ${MATRIX_GPU_ARCH_VERSION} == "12.1" && ${MATRIX_CHANNEL} == "release" ]]; then
+        if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && \
+              ${MATRIX_GPU_ARCH_VERSION} == "12.1" && \
+              ${MATRIX_CHANNEL} == "release" && \
+              ${USE_ONLY_DL_PYTORCH_ORG} == "false" ]]; then
           source ./.github/scripts/validate_pipy.sh
           source ./.github/scripts/validate_poetry.sh
         fi

From 3d302eec26e4e26ec5bf767aef8278c1b521239d Mon Sep 17 00:00:00 2001
From: Jeff Daily <jeff.daily@amd.com>
Date: Thu, 1 Feb 2024 13:33:45 -0800
Subject: [PATCH 204/212] [ROCm] add hipblaslt library files (#1695)

With https://github.com/pytorch/pytorch/pull/114329 merged, we need to include hipblaslt library files within the ROCm wheel.
---
 manywheel/build_rocm.sh | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh
index a44d6212f..4e513957f 100755
--- a/manywheel/build_rocm.sh
+++ b/manywheel/build_rocm.sh
@@ -133,7 +133,7 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
     LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1"
     MAYBE_LIB64=lib
 fi
-OS_SO_PATHS=($LIBGOMP_PATH $LIBNUMA_PATH\ 
+OS_SO_PATHS=($LIBGOMP_PATH $LIBNUMA_PATH\
              $LIBELF_PATH $LIBTINFO_PATH\
              $LIBDRM_PATH $LIBDRM_AMDGPU_PATH)
 OS_SO_FILES=()
@@ -147,7 +147,7 @@ done
 if [[ $ROCM_INT -ge 50200 ]]; then
     ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
     ROCBLAS_LIB_DST=lib/rocblas/library
-else 
+else
     ROCBLAS_LIB_SRC=$ROCM_HOME/rocblas/lib/library
     ROCBLAS_LIB_DST=lib/library
 fi
@@ -156,17 +156,24 @@ ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
 OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
 ROCBLAS_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
 
+# hipblaslt library files
+HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library
+HIPBLASLT_LIB_DST=lib/hipblaslt/library
+ARCH_SPECIFIC_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -E $ARCH)
+OTHER_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -v gfx)
+HIPBLASLT_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
+
 # ROCm library files
 ROCM_SO_PATHS=()
 for lib in "${ROCM_SO_FILES[@]}"
 do
     file_path=($(find $ROCM_HOME/lib/ -name "$lib")) # First search in lib
-    if [[ -z $file_path ]]; then 
+    if [[ -z $file_path ]]; then
         if [ -d "$ROCM_HOME/lib64/" ]; then
             file_path=($(find $ROCM_HOME/lib64/ -name "$lib")) # Then search in lib64
         fi
     fi
-    if [[ -z $file_path ]]; then 
+    if [[ -z $file_path ]]; then
         file_path=($(find $ROCM_HOME/ -name "$lib")) # Then search in ROCM_HOME
     fi
     if [[ -z $file_path ]]; then
@@ -188,11 +195,13 @@ DEPS_SONAME=(
 
 DEPS_AUX_SRCLIST=(
     "${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_SRC/}"
+    "${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_SRC/}"
     "/opt/amdgpu/share/libdrm/amdgpu.ids"
 )
 
 DEPS_AUX_DSTLIST=(
     "${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_DST/}"
+    "${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_DST/}"
     "share/libdrm/amdgpu.ids"
 )
 

From da779da84f6dd212f8262c675e29afd993f8d289 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Thu, 1 Feb 2024 22:30:58 -0800
Subject: [PATCH 205/212] Minor tweak to fbgemmgpu version to ignore RC suffix
 (#1694)

---
 release/promote.sh          | 8 +++++++-
 release/release_versions.sh | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/release/promote.sh b/release/promote.sh
index 5bf7fe0b5..36644494e 100644
--- a/release/promote.sh
+++ b/release/promote.sh
@@ -12,7 +12,13 @@ TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
 TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
 TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
-FBGEMMGPU_VERSION=${FBGEMMGPU_VERSION:-0.6.0}
+
+# NB: FBGEMMGPU uses the practice of keeping rc version in the filename, i.e.
+# fbgemm_gpu-0.6.0rc1+cpu-cp311-cp311. On the other hand, its final RC will
+# be without rc suffix, fbgemm_gpu-0.6.0+cpu-cp311-cp311, and that's the one
+# ready to be promoted. So, keeping a + here in the version name allows the
+# promote script to find the correct binaries
+FBGEMMGPU_VERSION=${FBGEMMGPU_VERSION:-0.6.0+}
 
 DRY_RUN=${DRY_RUN:-enabled}
 
diff --git a/release/release_versions.sh b/release/release_versions.sh
index 88b6cbcd3..311358bd3 100644
--- a/release/release_versions.sh
+++ b/release/release_versions.sh
@@ -7,4 +7,10 @@ TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.2.0}
 TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.17.0}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
 TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
-FBGEMMGPU_VERSION=${FBGEMMGPU_VERSION:-0.6.0}
+
+# NB: FBGEMMGPU uses the practice of keeping rc version in the filename, i.e.
+# fbgemm_gpu-0.6.0rc1+cpu-cp311-cp311. On the other hand, its final RC will
+# be without rc suffix, fbgemm_gpu-0.6.0+cpu-cp311-cp311, and that's the one
+# ready to be promoted. So, keeping a + here in the version name allows the
+# promote script to find the correct binaries
+FBGEMMGPU_VERSION=${FBGEMMGPU_VERSION:-0.6.0+}

From 96bd8512cb0a04368930cdad47b6b91ff6671f5f Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 6 Feb 2024 11:03:20 -0800
Subject: [PATCH 206/212] Remove custom PyTorch build dependency logic on 3.11
 (#1697)

* Remove custom PyTorch build dependency logic on 3.11

* Add a smoke test for openmp
---
 run_tests.sh         | 5 ++++-
 wheel/build_wheel.sh | 7 ++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/run_tests.sh b/run_tests.sh
index fd66835e2..2046501ca 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -142,7 +142,7 @@ if [[ "$cuda_ver" != 'cpu' ]]; then
     fi
 fi
 
-# Check that OpenBlas is not linked to on Macs
+# Check that OpenBlas is not linked to on MacOS
 if [[ "$(uname)" == 'Darwin' ]]; then
     echo "Checking the OpenBLAS is not linked to"
     all_dylibs=($(find "$(python -c "import site; print(site.getsitepackages()[0])")"/torch -name '*.dylib'))
@@ -153,6 +153,9 @@ if [[ "$(uname)" == 'Darwin' ]]; then
             exit 1
         fi
     done
+
+    echo "Checking that OpenMP is available"
+    python -c "import torch; exit(0 if torch.backends.openmp.is_available() else 1)"
 fi
 
 popd
diff --git a/wheel/build_wheel.sh b/wheel/build_wheel.sh
index 1186bc56a..5e9c68041 100755
--- a/wheel/build_wheel.sh
+++ b/wheel/build_wheel.sh
@@ -182,11 +182,8 @@ tmp_env_name="wheel_py$python_nodot"
 conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python"
 source activate "$tmp_env_name"
 
-if [[ "$desired_python" == "3.11" ]]; then
-  retry pip install -q "numpy${NUMPY_PINNED_VERSION}" "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" typing_extensions requests
-else
-  retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq "numpy${NUMPY_PINNED_VERSION}" nomkl "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" typing_extensions requests
-fi
+retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq "numpy${NUMPY_PINNED_VERSION}" nomkl "setuptools${SETUPTOOLS_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" typing_extensions requests
+
 if [[ "$(uname -m)" == "arm64" ]]; then
   retry conda install ${EXTRA_CONDA_INSTALL_FLAGS} -yq cmake ninja
 else

From 196b77bc829631379dc9242797cbe3e7e03b28dc Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 6 Feb 2024 12:09:00 -0800
Subject: [PATCH 207/212] Pin conda-build to 3.28.4 (#1698)

---
 conda/build_pytorch.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 39aab7ee8..473d3bf27 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -208,7 +208,8 @@ if [[ "$(uname)" == 'Darwin' ]]; then
         "$miniconda_sh" -b -p "$tmp_conda" && \
         rm "$miniconda_sh"
     export PATH="$tmp_conda/bin:$PATH"
-    retry conda install -yq conda-build
+    # TODO(huydhn): We can revert the pin after https://github.com/conda/conda-build/issues/5167 is resolved
+    retry conda install -yq conda-build=3.28.4
 elif [[ "$OSTYPE" == "msys" ]]; then
     export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda"
     export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe"
@@ -351,8 +352,6 @@ for py_ver in "${DESIRED_PYTHON[@]}"; do
       conda install -y conda-package-handling conda==22.9.0
     else
       conda install -y conda-package-handling conda==23.5.2
-      # NS: To be removed after conda docker images are updated
-      conda update -y conda-build
     fi
 
     echo "Calling conda-build at $(date)"

From 850d28bb1503dee0d0946fcdd3e1759af2061ffb Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Tue, 6 Feb 2024 17:46:25 -0600
Subject: [PATCH 208/212] ci: aarch64 linux: fix torch performance issue with
 conda openblas package (#1696)

changing the conda openblas package from pthread version
to openmp version to match torch openmp runtime. The pthread
version was conflicting with the openmp runtime and causing
thread over-subscription and performance degradation.
---
 aarch64_linux/aarch64_ci_setup.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_ci_setup.sh b/aarch64_linux/aarch64_ci_setup.sh
index 53c8a5320..07a4757ff 100755
--- a/aarch64_linux/aarch64_ci_setup.sh
+++ b/aarch64_linux/aarch64_ci_setup.sh
@@ -30,7 +30,7 @@ if [[ "$DESIRED_PYTHON"  == "3.8" ]]; then
 else
     NUMPY_VERSION="1.26.2"
 fi
-conda install -y -c conda-forge numpy==${NUMPY_VERSION} pyyaml==6.0.1 patchelf==0.17.2 pygit2==1.13.2 openblas==0.3.25 ninja==1.11.1 scons==4.5.2
+conda install -y -c conda-forge numpy==${NUMPY_VERSION} pyyaml==6.0.1 patchelf==0.17.2 pygit2==1.13.2 openblas==0.3.25=*openmp* ninja==1.11.1 scons==4.5.2
 
 python --version
 conda --version

From fa8b6d667be4828bb88ec2a7d9e1b90fc4c081a9 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 13 Feb 2024 13:41:10 -0500
Subject: [PATCH 209/212] Add triton version for nightly and release (#1703)

---
 manywheel/build_cuda.sh | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 9919247ed..318273ba4 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -262,19 +262,20 @@ else
     exit 1
 fi
 
-# TODO: Remove me when Triton has a proper release channel
-# No triton dependency for now on 3.12 since we don't have binaries for it
-# and torch.compile doesn't work.
-if [[ $(uname) == "Linux" && "$DESIRED_PYTHON" != "3.12" ]]; then
+
+TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
+# Only linux Python < 3.12 are supported wheels for triton
+TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64' and python_version < '3.12'"
+TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
+if [[ -n "$OVERRIDE_PACKAGE_VERSION" && "$OVERRIDE_PACKAGE_VERSION" =~ .*dev.* ]]; then
     TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.github/ci_commit_pins/triton.txt)
-    TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
-    TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}; platform_system == 'Linux' and platform_machine == 'x86_64'"
+    TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
+fi
 
-    if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
-        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
-    else
-        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}"
-    fi
+if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
+    export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
+else
+    export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}"
 fi
 
 # builder/test.sh requires DESIRED_CUDA to know what tests to exclude

From 5c814e2527b3f5797488bf57d9d5425e63dcc1ac Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Tue, 13 Feb 2024 10:58:30 -0800
Subject: [PATCH 210/212] Bundle PTXAS into 11.8 wheel

---
 manywheel/build_cuda.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 318273ba4..39dafe8b1 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -197,6 +197,8 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then
     export USE_STATIC_CUDNN=0
     # Try parallelizing nvcc as well
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
+    # Bundle ptxas into the wheel, see https://github.com/pytorch/pytorch/pull/119750
+    export BUILD_BUNDLE_PTXAS=1
 
     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
         echo "Bundling with cudnn and cublas."

From f4b92598544c7e8250a208e9616e62a25e7f7f18 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 16 Feb 2024 12:55:06 -0500
Subject: [PATCH 211/212] Add tensorrt promo script, bump release version for
 2.2.1 (#1706)

---
 release/promote.sh          | 10 ++++++----
 release/release_versions.sh |  9 +++++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/release/promote.sh b/release/promote.sh
index 36644494e..78a43409b 100644
--- a/release/promote.sh
+++ b/release/promote.sh
@@ -6,12 +6,13 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 source "${DIR}/release_versions.sh"
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-2.1.1}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.16.1}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.1.1}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.16.1}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.2.1}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.17.1}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.2.1}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.17.1}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
 TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
+TENSORRT_VERSION=${TENSORRT_VERSION:-2.2.0}
 
 # NB: FBGEMMGPU uses the practice of keeping rc version in the filename, i.e.
 # fbgemm_gpu-0.6.0rc1+cpu-cp311-cp311. On the other hand, its final RC will
@@ -113,6 +114,7 @@ promote_pypi() {
 # promote_s3 torchrec whl "${TORCHREC_VERSION}"
 # promote_s3 fbgemm-gpu whl "${FBGEMMGPU_VERSION}"
 # promote_s3 "libtorch-*" libtorch "${PYTORCH_VERSION}"
+# promote_s3 "torch_tensorrt" whl "${TENSORRT_VERSION}"
 
 # promote_conda torchtriton conda "2.1.0"
 # promote_conda pytorch-cuda conda "11.8"
diff --git a/release/release_versions.sh b/release/release_versions.sh
index 311358bd3..af02cf7eb 100644
--- a/release/release_versions.sh
+++ b/release/release_versions.sh
@@ -1,12 +1,13 @@
 #!/usr/bin/env bash
 
 # Make sure to update these versions when doing a release first
-PYTORCH_VERSION=${PYTORCH_VERSION:-2.2.0}
-TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.17.0}
-TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.2.0}
-TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.17.0}
+PYTORCH_VERSION=${PYTORCH_VERSION:-2.2.1}
+TORCHVISION_VERSION=${TORCHVISION_VERSION:-0.17.1}
+TORCHAUDIO_VERSION=${TORCHAUDIO_VERSION:-2.2.1}
+TORCHTEXT_VERSION=${TORCHTEXT_VERSION:-0.17.1}
 TORCHDATA_VERSION=${TORCHDATA_VERSION:-0.7.1}
 TORCHREC_VERSION=${TORCHREC_VERSION:-0.6.0}
+TENSORRT_VERSION=${TENSORRT_VERSION:-2.2.0}
 
 # NB: FBGEMMGPU uses the practice of keeping rc version in the filename, i.e.
 # fbgemm_gpu-0.6.0rc1+cpu-cp311-cp311. On the other hand, its final RC will

From af4827c637d2f1fca7fbc52e96364ea9840508a3 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 16 Feb 2024 17:46:15 -0500
Subject: [PATCH 212/212] Pin Conda to 23.11.0

---
 .github/scripts/validate_binaries.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 042b22767..f3b73e3de 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -3,7 +3,8 @@ if [[ ${MATRIX_PACKAGE_TYPE} == "libtorch" ]]; then
     unzip libtorch.zip
 else
 
-    conda update -y -n base -c defaults conda
+    # Conda pinned see issue: https://github.com/ContinuumIO/anaconda-issues/issues/13350
+    conda install -y conda=23.11.0
     # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
     conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
     conda activate ${ENV_NAME}