diff --git a/.circleci/config.yml b/.circleci/config.yml index 7dd66a97d72a3c..78ed6b02b8e30c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -176,7 +176,6 @@ jobs: - run: python utils/check_config_attributes.py - run: python utils/check_doctest_list.py - run: make deps_table_check_updated - - run: python utils/tests_fetcher.py --sanity_check - run: python utils/update_metadata.py --check-only - run: python utils/check_task_guides.py diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 6627812a666b6f..a7df11e8fb849a 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -3,7 +3,7 @@ name: Build docker images (scheduled) on: push: branches: - - docker-image* + - build_ci_docker_image* repository_dispatch: workflow_call: inputs: @@ -67,35 +67,6 @@ jobs: push: true tags: huggingface/transformers-all-latest-gpu-push-ci - latest-with-torch-nightly-docker: - name: "Nightly PyTorch + Stable TensorFlow" - # Push CI doesn't need this image - if: inputs.image_postfix != '-push-ci' - runs-on: ubuntu-latest - steps: - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Check out code - uses: actions/checkout@v3 - - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_PASSWORD }} - - - name: Build and push - uses: docker/build-push-action@v3 - with: - context: ./docker/transformers-all-latest-gpu - build-args: | - REF=main - PYTORCH=pre - push: true - tags: huggingface/transformers-all-latest-torch-nightly-gpu - latest-torch-deepspeed-docker: name: "Latest PyTorch + DeepSpeed" runs-on: ubuntu-latest @@ -153,34 +124,6 @@ jobs: push: true tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci - nightly-torch-deepspeed-docker: - name: "Nightly PyTorch + DeepSpeed" - # Push CI doesn't need this image - if: inputs.image_postfix != '-push-ci' - runs-on: ubuntu-latest - steps: - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Check out code - uses: actions/checkout@v3 - - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_PASSWORD }} - - - name: Build and push - uses: docker/build-push-action@v3 - with: - context: ./docker/transformers-pytorch-deepspeed-nightly-gpu - build-args: | - REF=main - push: true - tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu - doc-builder: name: "Doc builder" # Push CI doesn't need this image diff --git a/.github/workflows/build-nightly-ci-docker-images.yml b/.github/workflows/build-nightly-ci-docker-images.yml new file mode 100644 index 00000000000000..f13dda7daa82bc --- /dev/null +++ b/.github/workflows/build-nightly-ci-docker-images.yml @@ -0,0 +1,75 @@ +name: Build docker images (Nightly CI) + +on: + workflow_call: + push: + branches: + - build_nightly_ci_docker_image* + +concurrency: + group: docker-images-builds + cancel-in-progress: false + +jobs: + latest-with-torch-nightly-docker: + name: "Nightly PyTorch + Stable TensorFlow" + runs-on: ubuntu-latest + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v3 + with: + context: ./docker/transformers-all-latest-gpu + build-args: | + REF=main + PYTORCH=pre + push: true + tags: huggingface/transformers-all-latest-torch-nightly-gpu + + nightly-torch-deepspeed-docker: + name: "Nightly PyTorch + DeepSpeed" + runs-on: ubuntu-latest + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - + name: Check out code + uses: actions/checkout@v3 + - + name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v3 + with: + context: ./docker/transformers-pytorch-deepspeed-nightly-gpu + build-args: | + REF=main + push: true + tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu \ No newline at end of file diff --git a/.github/workflows/build-past-ci-docker-images.yml b/.github/workflows/build-past-ci-docker-images.yml index 3a0e1612454c58..18d88f2d52fa75 100644 --- a/.github/workflows/build-past-ci-docker-images.yml +++ b/.github/workflows/build-past-ci-docker-images.yml @@ -3,7 +3,7 @@ name: Build docker images (Past CI) on: push: branches: - - past-ci-docker-image* + - build_past_ci_docker_image* concurrency: group: docker-images-builds @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - version: ["1.11", "1.10", "1.9", "1.8", "1.7", "1.6", "1.5", "1.4"] + version: ["1.13", "1.12", "1.11", "1.10", "1.9"] runs-on: ubuntu-latest steps: - @@ -24,6 +24,17 @@ jobs: - name: Check out code uses: actions/checkout@v3 + - + id: get-base-image + name: Get Base Image + env: + framework_version: ${{ matrix.version }} + run: | + echo "base_image=$(python3 -c 'import os; from utils.past_ci_versions import past_versions_testing; base_image = past_versions_testing["pytorch"][os.environ["framework_version"]]["base_image"]; print(base_image)')" >> $GITHUB_OUTPUT + - + name: Print Base Image + run: | + echo ${{ steps.get-base-image.outputs.base_image }} - name: Login to DockerHub uses: docker/login-action@v2 @@ -37,6 +48,7 @@ jobs: context: ./docker/transformers-past-gpu build-args: | REF=main + BASE_DOCKER_IMAGE=${{ steps.get-base-image.outputs.base_image }} FRAMEWORK=pytorch VERSION=${{ matrix.version }} push: true @@ -47,7 +59,7 @@ jobs: strategy: fail-fast: false matrix: - version: ["2.8", "2.7", "2.6", "2.5"] + version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"] runs-on: ubuntu-latest steps: - @@ -57,37 +69,16 @@ jobs: name: Check out code uses: actions/checkout@v3 - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_PASSWORD }} + id: get-base-image + name: Get Base Image + env: + framework_version: ${{ matrix.version }} + run: | + echo "base_image=$(python3 -c 'import os; from utils.past_ci_versions import past_versions_testing; base_image = past_versions_testing["tensorflow"][os.environ["framework_version"]]["base_image"]; print(base_image)')" >> $GITHUB_OUTPUT - - name: Build and push - uses: docker/build-push-action@v3 - with: - context: ./docker/transformers-past-gpu - build-args: | - REF=main - FRAMEWORK=tensorflow - VERSION=${{ matrix.version }} - push: true - tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu - - past-tensorflow-docker-2-4: - name: "Past TensorFlow Docker" - strategy: - fail-fast: false - matrix: - version: ["2.4"] - runs-on: ubuntu-latest - steps: - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Check out code - uses: actions/checkout@v3 + name: Print Base Image + run: | + echo ${{ steps.get-base-image.outputs.base_image }} - name: Login to DockerHub uses: docker/login-action@v2 @@ -101,8 +92,8 @@ jobs: context: ./docker/transformers-past-gpu build-args: | REF=main - BASE_DOCKER_IMAGE=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 + BASE_DOCKER_IMAGE=${{ steps.get-base-image.outputs.base_image }} FRAMEWORK=tensorflow VERSION=${{ matrix.version }} push: true - tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu \ No newline at end of file + tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu diff --git a/.github/workflows/self-nightly-past-ci-caller.yml b/.github/workflows/self-nightly-past-ci-caller.yml new file mode 100644 index 00000000000000..e86e6a16662c56 --- /dev/null +++ b/.github/workflows/self-nightly-past-ci-caller.yml @@ -0,0 +1,143 @@ +name: Self-hosted runner (nightly-past-ci-caller) + +on: + schedule: + # 2 am on each Sunday and Thursday + - cron: "0 2 * * 0,4" + push: + branches: + - run_nightly_ci* + - run_past_ci* + +jobs: + build_nightly_ci_images: + name: Build Nightly CI Docker Images + if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci')) + uses: ./.github/workflows/build-nightly-ci-docker-images.yml + secrets: inherit + + run_nightly_ci: + name: Nightly CI + needs: [build_nightly_ci_images] + uses: ./.github/workflows/self-nightly-scheduled.yml + secrets: inherit + + run_past_ci_pytorch_1-13: + name: PyTorch 1.13 + if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) + needs: [run_nightly_ci] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.13" + secrets: inherit + + run_past_ci_pytorch_1-12: + name: PyTorch 1.12 + if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) + needs: [run_past_ci_pytorch_1-13] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.12" + secrets: inherit + + run_past_ci_pytorch_1-11: + name: PyTorch 1.11 + if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) + needs: [run_past_ci_pytorch_1-12] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.11" + secrets: inherit + + run_past_ci_pytorch_1-10: + name: PyTorch 1.10 + if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) + needs: [run_past_ci_pytorch_1-11] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.10" + secrets: inherit + + run_past_ci_pytorch_1-9: + name: PyTorch 1.9 + if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) + needs: [run_past_ci_pytorch_1-10] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.9" + secrets: inherit + + run_past_ci_tensorflow_2-11: + name: TensorFlow 2.11 + if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + needs: [run_past_ci_pytorch_1-9] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.11" + secrets: inherit + + run_past_ci_tensorflow_2-10: + name: TensorFlow 2.10 + if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + needs: [run_past_ci_tensorflow_2-11] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.10" + secrets: inherit + + run_past_ci_tensorflow_2-9: + name: TensorFlow 2.9 + if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + needs: [run_past_ci_tensorflow_2-10] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.9" + secrets: inherit + + run_past_ci_tensorflow_2-8: + name: TensorFlow 2.8 + if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + needs: [run_past_ci_tensorflow_2-9] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.8" + secrets: inherit + + run_past_ci_tensorflow_2-7: + name: TensorFlow 2.7 + if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + needs: [run_past_ci_tensorflow_2-8] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.7" + secrets: inherit + + run_past_ci_tensorflow_2-6: + name: TensorFlow 2.6 + if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + needs: [run_past_ci_tensorflow_2-7] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.6" + secrets: inherit + + run_past_ci_tensorflow_2-5: + name: TensorFlow 2.5 + if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + needs: [run_past_ci_tensorflow_2-6] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.5" + secrets: inherit diff --git a/.github/workflows/self-nightly-scheduled.yml b/.github/workflows/self-nightly-scheduled.yml index ca5186e736f416..b3e13cbb1b7d52 100644 --- a/.github/workflows/self-nightly-scheduled.yml +++ b/.github/workflows/self-nightly-scheduled.yml @@ -1,4 +1,4 @@ -name: Self-hosted runner (nightly) +name: Self-hosted runner (nightly-ci) # Note that each job's dependencies go into a corresponding docker file. # @@ -8,9 +8,7 @@ name: Self-hosted runner (nightly) on: repository_dispatch: -# Disable temporarily until the test suite can be run under 12 hours. -# schedule: -# - cron: "0 16 * * *" + workflow_call: env: HF_HOME: /mnt/cache @@ -33,7 +31,7 @@ jobs: fetch-depth: 2 - name: Check Runner Status - run: python utils/check_self_hosted_runner.py --target_runners single-gpu-scheduled-ci-runner-docker,multi-gpu-scheduled-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} + run: python utils/check_self_hosted_runner.py --target_runners single-gpu-past-ci-runner-docker,multi-gpu-past-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} check_runners: name: Check Runners @@ -41,7 +39,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }} + runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} container: image: huggingface/transformers-all-latest-torch-nightly-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -56,7 +54,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }} + runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} container: image: huggingface/transformers-all-latest-torch-nightly-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -96,7 +94,7 @@ jobs: matrix: folders: ${{ fromJson(needs.setup.outputs.matrix) }} machine_type: [single-gpu] - runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }} + runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} container: image: huggingface/transformers-all-latest-torch-nightly-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -143,7 +141,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports + name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} run_tests_multi_gpu: @@ -153,7 +151,7 @@ jobs: matrix: folders: ${{ fromJson(needs.setup.outputs.matrix) }} machine_type: [multi-gpu] - runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }} + runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} container: image: huggingface/transformers-all-latest-torch-nightly-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -200,7 +198,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports + name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} run_all_tests_torch_cuda_extensions_gpu: @@ -209,7 +207,7 @@ jobs: fail-fast: false matrix: machine_type: [single-gpu, multi-gpu] - runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }} + runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} needs: setup container: image: huggingface/transformers-pytorch-deepspeed-nightly-gpu @@ -258,7 +256,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports + name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu send_results: @@ -292,7 +290,7 @@ jobs: CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: nightly-build + CI_EVENT: Nightly CI RUNNER_STATUS: ${{ needs.check_runner_status.result }} RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} SETUP_STATUS: ${{ needs.setup.result }} @@ -302,3 +300,11 @@ jobs: pip install slack_sdk pip show slack_sdk python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" + + + # delete-artifact + - uses: geekyeggo/delete-artifact@v2 + with: + name: | + single-* + multi-* \ No newline at end of file diff --git a/.github/workflows/self-past-caller.yml b/.github/workflows/self-past-caller.yml deleted file mode 100644 index 2cc81dac8ca281..00000000000000 --- a/.github/workflows/self-past-caller.yml +++ /dev/null @@ -1,136 +0,0 @@ -name: Self-hosted runner (past-ci-caller) - -on: - push: - branches: - - run-past-ci* - -jobs: - run_past_ci_pytorch_1-11: - name: PyTorch 1.11 - if: always() - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.11" - secrets: inherit - - run_past_ci_pytorch_1-10: - name: PyTorch 1.10 - if: always() - needs: [run_past_ci_pytorch_1-11] - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.10" - secrets: inherit - - run_past_ci_pytorch_1-9: - name: PyTorch 1.9 - if: always() - needs: [run_past_ci_pytorch_1-10] - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.9" - secrets: inherit - - run_past_ci_pytorch_1-8: - name: PyTorch 1.8 - if: always() - needs: [run_past_ci_pytorch_1-9] - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.8" - secrets: inherit - - run_past_ci_pytorch_1-7: - name: PyTorch 1.7 - if: always() - needs: [run_past_ci_pytorch_1-8] - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.7" - secrets: inherit - - run_past_ci_pytorch_1-6: - name: PyTorch 1.6 - if: always() - needs: [run_past_ci_pytorch_1-7] - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.6" - secrets: inherit - - run_past_ci_pytorch_1-5: - name: PyTorch 1.5 - if: always() - needs: [run_past_ci_pytorch_1-6] - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.5" - secrets: inherit - - run_past_ci_pytorch_1-4: - name: PyTorch 1.4 - if: always() - needs: [run_past_ci_pytorch_1-5] - uses: ./.github/workflows/self-past.yml - with: - framework: pytorch - version: "1.4" - secrets: inherit - - run_past_ci_tensorflow_2-8: - name: TensorFlow 2.8 - if: always() - needs: [run_past_ci_pytorch_1-4] - uses: ./.github/workflows/self-past.yml - with: - framework: tensorflow - version: "2.8" - secrets: inherit - - run_past_ci_tensorflow_2-7: - name: TensorFlow 2.7 - if: always() - needs: [run_past_ci_tensorflow_2-8] - uses: ./.github/workflows/self-past.yml - with: - framework: tensorflow - version: "2.7" - secrets: inherit - - run_past_ci_tensorflow_2-6: - name: TensorFlow 2.6 - if: always() - needs: [run_past_ci_tensorflow_2-7] - uses: ./.github/workflows/self-past.yml - with: - framework: tensorflow - version: "2.6" - secrets: inherit - - run_past_ci_tensorflow_2-5: - name: TensorFlow 2.5 - if: always() - needs: [run_past_ci_tensorflow_2-6] - uses: ./.github/workflows/self-past.yml - with: - framework: tensorflow - version: "2.5" - secrets: inherit - - run_past_ci_tensorflow_2-4: - name: TensorFlow 2.4 - if: always() - needs: [run_past_ci_tensorflow_2-5] - uses: ./.github/workflows/self-past.yml - with: - framework: tensorflow - version: "2.4" - secrets: inherit \ No newline at end of file diff --git a/.github/workflows/self-past.yml b/.github/workflows/self-past.yml index 12ddcc6658374e..bcb6639a79810e 100644 --- a/.github/workflows/self-past.yml +++ b/.github/workflows/self-past.yml @@ -1,4 +1,4 @@ -name: Self-hosted runner (past) +name: Self-hosted runner (past-ci) # Note that each job's dependencies go into a corresponding docker file. # @@ -157,7 +157,7 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports + name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} run_tests_multi_gpu: @@ -223,14 +223,80 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports + name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + run_all_tests_torch_cuda_extensions_gpu: + name: Torch CUDA extension tests + if: inputs.framework == 'pytorch' + strategy: + fail-fast: false + matrix: + machine_type: [single-gpu, multi-gpu] + runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} + needs: setup + container: + image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Remove cached torch extensions + run: rm -rf /github/home/.cache/torch_extensions/ + + # To avoid unknown test failures + - name: Pre build DeepSpeed *again* + working-directory: / + run: | + python3 -m pip uninstall -y deepspeed + rm -rf DeepSpeed + git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build + DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run all tests on GPU + working-directory: /transformers + run: | + python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} + path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu + send_results: name: Send results to webhook runs-on: ubuntu-latest if: always() - needs: [check_runner_status, check_runners, setup, run_tests_single_gpu, run_tests_multi_gpu] + needs: [ + check_runner_status, + check_runners, + setup, + run_tests_single_gpu, + run_tests_multi_gpu, + run_all_tests_torch_cuda_extensions_gpu + ] steps: - name: Preliminary job status shell: bash @@ -272,4 +338,11 @@ jobs: uses: actions/upload-artifact@v3 with: name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }} - path: test_failure_tables \ No newline at end of file + path: test_failure_tables + + # delete-artifact + - uses: geekyeggo/delete-artifact@v2 + with: + name: | + single-* + multi-* \ No newline at end of file diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index f535efba27ca5d..3ebf38062c4207 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -10,6 +10,9 @@ on: repository_dispatch: schedule: - cron: "0 2 * * *" + push: + branches: + - run_scheduled_ci* env: HF_HOME: /mnt/cache diff --git a/Makefile b/Makefile index 400a35bbfe2e7f..9e1d197cb6473a 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,6 @@ repo-consistency: python utils/check_config_docstrings.py python utils/check_config_attributes.py python utils/check_doctest_list.py - python utils/tests_fetcher.py --sanity_check python utils/update_metadata.py --check-only python utils/check_task_guides.py diff --git a/docker/transformers-past-gpu/Dockerfile b/docker/transformers-past-gpu/Dockerfile index 99fb550c6a35d8..8ecc83c339d973 100644 --- a/docker/transformers-past-gpu/Dockerfile +++ b/docker/transformers-past-gpu/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_DOCKER_IMAGE="nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04" +ARG BASE_DOCKER_IMAGE FROM $BASE_DOCKER_IMAGE LABEL maintainer="Hugging Face" @@ -8,7 +8,7 @@ ARG DEBIAN_FRONTEND=noninteractive SHELL ["sh", "-lc"] RUN apt update -RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs +RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs libaio-dev RUN git lfs install RUN python3 -m pip install --no-cache-dir --upgrade pip @@ -23,6 +23,9 @@ RUN cd transformers && python3 setup.py develop ARG FRAMEWORK ARG VERSION +# Control `setuptools` version to avoid some issues +RUN [ "$VERSION" != "1.9" -a "$VERSION" != "1.10" ] && python3 -m pip install -U setuptools || python3 -m pip install -U "setuptools<=59.5" + # Remove all frameworks # (`accelerate` requires `torch`, and this causes import issues for TF-only testing) RUN python3 -m pip uninstall -y torch torchvision torchaudio accelerate tensorflow jax flax @@ -34,4 +37,20 @@ RUN python3 ./transformers/utils/past_ci_versions.py --framework $FRAMEWORK --ve RUN echo "INSTALL_CMD = $INSTALL_CMD" RUN $INSTALL_CMD +RUN [ "$FRAMEWORK" != "pytorch" ] && echo "`deepspeed-testing` installation is skipped" || python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing] + +# Uninstall `torch-tensorrt` and `apex` shipped with the base image +RUN python3 -m pip uninstall -y torch-tensorrt apex + +# Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout) +RUN python3 -m pip uninstall -y deepspeed +# This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.) +# Issue: https://github.com/microsoft/DeepSpeed/issues/2010 +# RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \ +# DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 + RUN python3 -m pip install -U "itsdangerous<2.1.0" + +# When installing in editable mode, `transformers` is not recognized as a package. +# this line must be added in order for python to be aware of transformers. +RUN cd transformers && python3 setup.py develop diff --git a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile index 573e09c22a9c05..fcb599ddc232d6 100644 --- a/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-nightly-gpu/Dockerfile @@ -1,10 +1,11 @@ -FROM nvcr.io/nvidia/pytorch:21.03-py3 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08 +FROM nvcr.io/nvidia/pytorch:22.08-py3 LABEL maintainer="Hugging Face" ARG DEBIAN_FRONTEND=noninteractive # Example: `cu102`, `cu113`, etc. -ARG CUDA='cu113' +ARG CUDA='cu117' RUN apt -y update RUN apt install -y libaio-dev @@ -20,6 +21,9 @@ RUN python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing] +# Uninstall `torch-tensorrt` and `apex` shipped with the base image +RUN python3 -m pip uninstall -y torch-tensorrt apex + # Pre-build **nightly** release of DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout) RUN python3 -m pip uninstall -y deepspeed # This has to be run inside the GPU VMs running the tests. (So far, it fails here due to GPU checks during compilation.) @@ -27,23 +31,23 @@ RUN python3 -m pip uninstall -y deepspeed # RUN git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build && \ # DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 -# For `torchdynamo` tests -# (see https://github.com/huggingface/transformers/pull/17765) -RUN git clone https://github.com/pytorch/functorch -RUN python3 -m pip install --no-cache-dir ./functorch[aot] -RUN cd functorch && python3 setup.py develop - -RUN git clone https://github.com/pytorch/torchdynamo -RUN python3 -m pip install -r ./torchdynamo/requirements.txt -RUN cd torchdynamo && python3 setup.py develop - -# install TensorRT -RUN python3 -m pip install --no-cache-dir -U nvidia-pyindex -RUN python3 -m pip install --no-cache-dir -U nvidia-tensorrt==8.2.4.2 - -# install torch_tensorrt (fx path) -RUN git clone https://github.com/pytorch/TensorRT.git -RUN cd TensorRT/py && python3 setup.py install --fx-only +## For `torchdynamo` tests +## (see https://github.com/huggingface/transformers/pull/17765) +#RUN git clone https://github.com/pytorch/functorch +#RUN python3 -m pip install --no-cache-dir ./functorch[aot] +#RUN cd functorch && python3 setup.py develop +# +#RUN git clone https://github.com/pytorch/torchdynamo +#RUN python3 -m pip install -r ./torchdynamo/requirements.txt +#RUN cd torchdynamo && python3 setup.py develop +# +## install TensorRT +#RUN python3 -m pip install --no-cache-dir -U nvidia-pyindex +#RUN python3 -m pip install --no-cache-dir -U nvidia-tensorrt==8.2.4.2 +# +## install torch_tensorrt (fx path) +#RUN git clone https://github.com/pytorch/TensorRT.git +#RUN cd TensorRT/py && python3 setup.py install --fx-only # When installing in editable mode, `transformers` is not recognized as a package. # this line must be added in order for python to be aware of transformers. diff --git a/docs/source/en/generation_strategies.mdx b/docs/source/en/generation_strategies.mdx index 831c8772b6c63c..00ee9221fe6803 100644 --- a/docs/source/en/generation_strategies.mdx +++ b/docs/source/en/generation_strategies.mdx @@ -216,11 +216,11 @@ We pride ourselves on being the best in the business and our customer service is ### Multinomial sampling As opposed to greedy search that always chooses a token with the highest probability as the -next token, multinomial sampling randomly selects the next token based on the probability distribution over the entire +next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the risk of repetition. -To enable multinomial sampling set `do_sample=True`. +To enable multinomial sampling set `do_sample=True` and `num_beams=1`. ```python >>> from transformers import AutoTokenizer, AutoModelForCausalLM @@ -232,7 +232,7 @@ To enable multinomial sampling set `do_sample=True`. >>> prompt = "Today was an amazing day because" >>> inputs = tokenizer(prompt, return_tensors="pt") ->>> outputs = model.generate(**inputs, do_sample=True, max_new_tokens=100) +>>> outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100) >>> tokenizer.batch_decode(outputs, skip_special_tokens=True) ['Today was an amazing day because we are now in the final stages of our trip to New York City which was very tough. \ It is a difficult schedule and a challenging part of the year but still worth it. I have been taking things easier and \ diff --git a/examples/research_projects/decision_transformer/requirements.txt b/examples/research_projects/decision_transformer/requirements.txt index b8545669e2cf5f..71ac03f908e182 100644 --- a/examples/research_projects/decision_transformer/requirements.txt +++ b/examples/research_projects/decision_transformer/requirements.txt @@ -175,7 +175,7 @@ pytz==2022.1 pytz-deprecation-shim==0.1.0.post0 PyYAML==6.0 ray==1.11.0 -redis==4.5.3 +redis==4.5.4 regex==2022.3.15 requests==2.27.1 requests-oauthlib==1.3.1 diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py index 73e1bdb214e63f..95c8064ee40445 100644 --- a/src/transformers/generation/logits_process.py +++ b/src/transformers/generation/logits_process.py @@ -106,12 +106,12 @@ class MinLengthLogitsProcessor(LogitsProcessor): def __init__(self, min_length: int, eos_token_id: Union[int, List[int]]): if not isinstance(min_length, int) or min_length < 0: - raise ValueError(f"`min_length` has to be a positive integer, but is {min_length}") + raise ValueError(f"`min_length` has to be a non-negative integer, but is {min_length}") if isinstance(eos_token_id, int): eos_token_id = [eos_token_id] if not all([isinstance(i, int) for i in eos_token_id]) or any([i < 0 for i in eos_token_id]): - raise ValueError(f"`eos_token_id` has to be a list of positive integers, but is {eos_token_id}") + logger.warning(f"`eos_token_id` has to be a list of positive integers, but is {eos_token_id}") self.min_length = min_length self.eos_token_id = eos_token_id @@ -148,7 +148,7 @@ def __init__(self, prompt_length_to_skip: int, min_new_tokens: int, eos_token_id if isinstance(eos_token_id, int): eos_token_id = [eos_token_id] if not all([isinstance(i, int) for i in eos_token_id]) or any([i < 0 for i in eos_token_id]): - raise ValueError(f"`eos_token_id` has to be a list of positive integers, but is {eos_token_id}") + logger.warning(f"`eos_token_id` has to be a list of positive integers, but is {eos_token_id}") self.prompt_length_to_skip = prompt_length_to_skip self.min_new_tokens = min_new_tokens diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index a93eab9581970d..ce31f9ddd6a232 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -1123,33 +1123,32 @@ def __init__(self): class NeptuneCallback(TrainerCallback): - """TrainerCallback that sends the logs to [Neptune](https://neptune.ai). + """TrainerCallback that sends the logs to [Neptune](https://app.neptune.ai). Args: - api_token (`str`, optional): - Neptune API token obtained upon registration. You can leave this argument out if you have saved your token - to the `NEPTUNE_API_TOKEN` environment variable (strongly recommended). See full setup instructions in the - [docs](https://docs.neptune.ai/getting-started/installation). - project (`str`, optional): - Name of an existing Neptune project, in the form: "workspace-name/project-name". You can find and copy the - name from the project Settings -> Properties in Neptune. If None (default), the value of the - `NEPTUNE_PROJECT` environment variable will be used. - name (`str`, optional): Custom name for the run. + api_token (`str`, *optional*): Neptune API token obtained upon registration. + You can leave this argument out if you have saved your token to the `NEPTUNE_API_TOKEN` environment + variable (strongly recommended). See full setup instructions in the + [docs](https://docs.neptune.ai/setup/installation). + project (`str`, *optional*): Name of an existing Neptune project, in the form "workspace-name/project-name". + You can find and copy the name in Neptune from the project settings -> Properties. If None (default), the + value of the `NEPTUNE_PROJECT` environment variable is used. + name (`str`, *optional*): Custom name for the run. base_namespace (`str`, optional, defaults to "finetuning"): In the Neptune run, the root namespace - that will contain all of the logged metadata. - log_parameters (`bool`, optional, defaults to True): + that will contain all of the metadata logged by the callback. + log_parameters (`bool`, *optional*, defaults to `True`): If True, logs all Trainer arguments and model parameters provided by the Trainer. - log_checkpoints (`str`, optional, defaults to None): - If "same", uploads checkpoints whenever they are saved by the Trainer. If "last", uploads only the most - recently saved checkpoint. If "best", uploads the best checkpoint (among the ones saved by the Trainer). If - None, does not upload checkpoints. - run (`Run`, optional): - Pass a Neptune run object if you want to continue logging to an existing run. Read more about resuming runs - in the [docs](https://docs.neptune.ai/how-to-guides/neptune-api/resume-run). - **neptune_run_kwargs (optional): + log_checkpoints (`str`, *optional*): If "same", uploads checkpoints whenever they are saved by the Trainer. + If "last", uploads only the most recently saved checkpoint. If "best", uploads the best checkpoint (among + the ones saved by the Trainer). If `None`, does not upload checkpoints. + run (`Run`, *optional*): Pass a Neptune run object if you want to continue logging to an existing run. + Read more about resuming runs in the [docs](https://docs.neptune.ai/logging/to_existing_object). + **neptune_run_kwargs (*optional*): Additional keyword arguments to be passed directly to the - [neptune.init_run()](https://docs.neptune.ai/api-reference/neptune#.init_run) function when a new run is - created. + [`neptune.init_run()`](https://docs.neptune.ai/api/neptune#init_run) function when a new run is created. + + For instructions and examples, see the [Transformers integration + guide](https://docs.neptune.ai/integrations/transformers) in the Neptune documentation. """ integration_version_key = "source_code/integrations/transformers" diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 384876fb6de239..9a6c29c27bdf63 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1736,6 +1736,41 @@ def save_pretrained( for ignore_key in self._keys_to_ignore_on_save: if ignore_key in state_dict.keys(): del state_dict[ignore_key] + if safe_serialization: + # Safetensors does not allow tensor aliasing. + # We're going to remove aliases before saving + ptrs = collections.defaultdict(list) + for name, tensor in state_dict.items(): + ptrs[tensor.data_ptr()].append(name) + + # These are all the pointers of shared tensors. + shared_ptrs = {ptr: names for ptr, names in ptrs.items() if len(names) > 1} + warn_names = set() + for names in shared_ptrs.values(): + # Removing the keys which are declared as known duplicates on + # load. This allows to make sure the name which is kept is consistent. + if self._keys_to_ignore_on_load_missing is not None: + for name in names: + matches_pattern = any(re.search(pat, name) for pat in self._keys_to_ignore_on_load_missing) + if matches_pattern and name in state_dict: + del state_dict[name] + + # When not all duplicates have been cleaned, still remove those keys, but put a clear warning. + # If the link between tensors was done at runtime then `from_pretrained` will not get + # the key back leading to random tensor. A proper warning will be shown + # during reload (if applicable), but since the file is not necessarily compatible with + # the config, better show a proper warning. + found = 0 + for name in names: + if name in state_dict: + found += 1 + if found > 1: + del state_dict[name] + warn_names.add(name) + if len(warn_names) > 0: + logger.warning_once( + f"Removed shared tensor {warn_names} while saving. This should be OK, but check by verifying that you don't receive any warning while reloading", + ) # Shard the model if it is too big. weights_name = SAFE_WEIGHTS_NAME if safe_serialization else WEIGHTS_NAME @@ -2813,6 +2848,11 @@ def _fix_key(key): missing_keys = list(set(expected_keys) - set(loaded_keys)) unexpected_keys = list(set(loaded_keys) - set(expected_keys)) + # Some tensors maybe have been already filled by another key (tied weights). + existing_ptrs = {model_state_dict[k].data_ptr() for k in loaded_keys if k in model_state_dict} + missing_keys = [ + k for k in missing_keys if k in model_state_dict and model_state_dict[k].data_ptr() not in existing_ptrs + ] # Some models may have keys that are not in the state by design, removing them before needlessly warning # the user. if cls._keys_to_ignore_on_load_missing is not None: diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index fd10c5cd11763c..b23b0c39d62147 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -24,7 +24,6 @@ from ...dynamic_module_utils import get_class_from_dynamic_module from ...tokenization_utils import PreTrainedTokenizer from ...tokenization_utils_base import TOKENIZER_CONFIG_FILE -from ...tokenization_utils_fast import PreTrainedTokenizerFast from ...utils import cached_file, extract_commit_hash, is_sentencepiece_available, is_tokenizers_available, logging from ..encoder_decoder import EncoderDecoderConfig from .auto_factory import _LazyAutoMapping @@ -37,6 +36,12 @@ ) +if is_tokenizers_available(): + from ...tokenization_utils_fast import PreTrainedTokenizerFast +else: + PreTrainedTokenizerFast = None + + logger = logging.get_logger(__name__) if TYPE_CHECKING: diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py index 59ea4ac7798a91..50808ec65c5db8 100644 --- a/src/transformers/models/blip/image_processing_blip.py +++ b/src/transformers/models/blip/image_processing_blip.py @@ -113,24 +113,28 @@ def resize( **kwargs, ) -> np.ndarray: """ - Resize an image. - - Resizes the shorter side of the image to `size["shortest_edge"]` while preserving the aspect ratio. If the - longer side is larger than the max size `(int(`size["shortest_edge"]` * 1333 / 800))`, the longer side is then - resized to the max size while preserving the aspect ratio. + Resize an image to `(size["height"], size["width"])`. Args: image (`np.ndarray`): Image to resize. size (`Dict[str, int]`): - Controls the size of the output image. Should be of the form `{"shortest_edge": int}`. - resample (`PILImageResampling` filter, *optional*, defaults to `PILImageResampling.BICUBIC`): - Resampling filter to use when resiizing the image. - data_format (`str` or `ChannelDimension`, *optional*): - The channel dimension format of the image. If not provided, it will be the same as the input image. + Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image. + resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`): + `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BICUBIC`. + data_format (`ChannelDimension` or `str`, *optional*): + The channel dimension format for the output image. If unset, the channel dimension format of the input + image is used. Can be one of: + - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format. + - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format. + + Returns: + `np.ndarray`: The resized image. """ size = get_size_dict(size, default_to_square=True) - output_size = (size["width"], size["height"]) + if "height" not in size or "width" not in size: + raise ValueError(f"The `size` dictionary must contain the keys `height` and `width`. Got {size.keys()}") + output_size = (size["height"], size["width"]) return resize(image, size=output_size, resample=resample, data_format=data_format, **kwargs) def rescale( diff --git a/src/transformers/models/blip_2/modeling_blip_2.py b/src/transformers/models/blip_2/modeling_blip_2.py index 46f0c9b11ce498..9b00274a0b14ca 100644 --- a/src/transformers/models/blip_2/modeling_blip_2.py +++ b/src/transformers/models/blip_2/modeling_blip_2.py @@ -1238,8 +1238,28 @@ def __init__(self, config: Blip2Config): # Initialize weights and apply final processing self.post_init() - def get_input_embeddings(self) -> nn.Module: - return self.vision_model.embeddings.patch_embedding + def get_input_embeddings(self): + return self.language_model.get_input_embeddings() + + def set_input_embeddings(self, value): + self.language_model.set_input_embeddings(value) + + def set_output_embeddings(self, new_embeddings): + self.language_model.set_output_embeddings(new_embeddings) + + def get_output_embeddings(self) -> nn.Module: + return self.language_model.get_output_embeddings() + + def get_encoder(self): + return self.language_model.get_encoder() + + def get_decoder(self): + return self.language_model.get_decoder() + + def _tie_weights(self): + if not self.config.use_decoder_only_language_model: + self.language_model.encoder.embed_tokens = self.language_model.shared + self.language_model.decoder.embed_tokens = self.language_model.shared @add_start_docstrings_to_model_forward(BLIP_2_TEXT_INPUTS_DOCSTRING) def get_text_features( diff --git a/src/transformers/models/deta/modeling_deta.py b/src/transformers/models/deta/modeling_deta.py index 6fd2e8fdd18412..eabc6e5e690d34 100644 --- a/src/transformers/models/deta/modeling_deta.py +++ b/src/transformers/models/deta/modeling_deta.py @@ -244,7 +244,7 @@ class DetaObjectDetectionOutput(ModelOutput): def _get_clones(module, N): - return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + return nn.ModuleList([module for i in range(N)]) def inverse_sigmoid(x, eps=1e-5): diff --git a/src/transformers/models/llama/configuration_llama.py b/src/transformers/models/llama/configuration_llama.py index 36b8ab72ab2ce7..30325b82f787c9 100644 --- a/src/transformers/models/llama/configuration_llama.py +++ b/src/transformers/models/llama/configuration_llama.py @@ -52,6 +52,9 @@ class LlamaConfig(PretrainedConfig): Number of attention heads for each attention layer in the Transformer encoder. hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): The non-linear activation function (function or string) in the decoder. + max_position_embeddings (`int`, *optional*, defaults to 2048): + The maximum sequence length that this model might ever be used with. Typically set this to something large + just in case (e.g., 512 or 1024 or 2048). initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. rms_norm_eps (`float`, *optional*, defaults to 1e-12): @@ -85,6 +88,7 @@ def __init__( num_hidden_layers=32, num_attention_heads=32, hidden_act="silu", + max_position_embeddings=2048, initializer_range=0.02, rms_norm_eps=1e-6, use_cache=True, @@ -95,6 +99,7 @@ def __init__( **kwargs, ): self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings self.hidden_size = hidden_size self.intermediate_size = intermediate_size self.num_hidden_layers = num_hidden_layers diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index 6d79536627fc20..c3f5285441bc60 100755 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -160,42 +160,24 @@ def forward(self, x): class LlamaAttention(nn.Module): """Multi-headed attention from 'Attention Is All You Need' paper""" - def __init__( - self, - hidden_size: int, - num_heads: int, - ): + def __init__(self, config: LlamaConfig): super().__init__() - self.hidden_size = hidden_size - self.num_heads = num_heads - self.head_dim = hidden_size // num_heads + self.config = config + self.hidden_size = config.hidden_size + self.num_heads = config.num_attention_heads + self.head_dim = self.hidden_size // self.num_heads + self.max_position_embeddings = config.max_position_embeddings - if (self.head_dim * num_heads) != self.hidden_size: + if (self.head_dim * self.num_heads) != self.hidden_size: raise ValueError( f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}" - f" and `num_heads`: {num_heads})." + f" and `num_heads`: {self.num_heads})." ) - self.q_proj = nn.Linear( - hidden_size, - num_heads * self.head_dim, - bias=False, - ) - self.k_proj = nn.Linear( - hidden_size, - num_heads * self.head_dim, - bias=False, - ) - self.v_proj = nn.Linear( - hidden_size, - num_heads * self.head_dim, - bias=False, - ) - self.o_proj = nn.Linear( - num_heads * self.head_dim, - hidden_size, - bias=False, - ) - self.rotary_emb = LlamaRotaryEmbedding(self.head_dim) + self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False) + self.k_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False) + self.v_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False) + self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False) + self.rotary_emb = LlamaRotaryEmbedding(self.head_dim, max_position_embeddings=self.max_position_embeddings) def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous() @@ -270,10 +252,7 @@ class LlamaDecoderLayer(nn.Module): def __init__(self, config: LlamaConfig): super().__init__() self.hidden_size = config.hidden_size - self.self_attn = LlamaAttention( - hidden_size=self.hidden_size, - num_heads=config.num_attention_heads, - ) + self.self_attn = LlamaAttention(config=config) self.mlp = LlamaMLP( hidden_size=self.hidden_size, intermediate_size=config.intermediate_size, @@ -630,8 +609,6 @@ def custom_forward(*inputs): class LlamaForCausalLM(LlamaPreTrainedModel): - _keys_to_ignore_on_load_missing = [r"lm_head.weight"] - def __init__(self, config): super().__init__(config) self.model = LlamaModel(config) diff --git a/src/transformers/models/nllb_moe/configuration_nllb_moe.py b/src/transformers/models/nllb_moe/configuration_nllb_moe.py index 03a37bb35d6b4f..3ff222b93cf68a 100644 --- a/src/transformers/models/nllb_moe/configuration_nllb_moe.py +++ b/src/transformers/models/nllb_moe/configuration_nllb_moe.py @@ -125,7 +125,7 @@ class NllbMoeConfig(PretrainedConfig): >>> # Accessing the model configuration >>> configuration = model.config ```""" - model_type = "nllb_moe" + model_type = "nllb-moe" keys_to_ignore_at_inference = ["past_key_values"] attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"} diff --git a/src/transformers/models/pix2struct/configuration_pix2struct.py b/src/transformers/models/pix2struct/configuration_pix2struct.py index 8642602cf97db5..dead3d8a042413 100644 --- a/src/transformers/models/pix2struct/configuration_pix2struct.py +++ b/src/transformers/models/pix2struct/configuration_pix2struct.py @@ -357,9 +357,10 @@ def __init__( initializer_factor=1.0, initializer_range=0.02, is_vqa=False, + tie_word_embeddings=False, **kwargs, ): - super().__init__(**kwargs) + super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs) if text_config is None: text_config = {} diff --git a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py index 16ce6fa035c44b..e331da14e810e8 100644 --- a/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +++ b/src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py @@ -564,7 +564,7 @@ def decode( >>> # compare word offsets with audio `common_voice_en_100038.mp3` online on the dataset viewer: >>> # https://huggingface.co/datasets/common_voice/viewer/en/train >>> word_offsets[:4] - [{'word': 'WHY', 'start_time': 1.42, 'end_time': 1.54}, {'word': 'DOES', 'start_time': 1.64, 'end_time': 1.88}, {'word': 'A', 'start_time': 2.12, 'end_time': 2.14}, {'word': 'MILE', 'start_time': 2.26, 'end_time': 2.46}] + [{'word': 'WHY', 'start_time': 1.42, 'end_time': 1.54}, {'word': 'DOES', 'start_time': 1.66, 'end_time': 1.9}, {'word': 'MILISANDRA', 'start_time': 2.26, 'end_time': 2.9}, {'word': 'LOOK', 'start_time': 3.0, 'end_time': 3.16}] ```""" from pyctcdecode.constants import ( diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index c8c0549a467414..7beab782c70ddf 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -32,7 +32,6 @@ from ..models.auto.modeling_auto import AutoModelForDepthEstimation from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from ..tokenization_utils import PreTrainedTokenizer -from ..tokenization_utils_fast import PreTrainedTokenizerFast from ..utils import ( HUGGINGFACE_CO_RESOLVE_ENDPOINT, is_kenlm_available, @@ -139,9 +138,13 @@ AutoModelForZeroShotImageClassification, AutoModelForZeroShotObjectDetection, ) + + if TYPE_CHECKING: from ..modeling_tf_utils import TFPreTrainedModel from ..modeling_utils import PreTrainedModel + from ..tokenization_utils_fast import PreTrainedTokenizerFast + logger = logging.get_logger(__name__) @@ -495,7 +498,7 @@ def pipeline( task: str = None, model: Optional = None, config: Optional[Union[str, PretrainedConfig]] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None, + tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, image_processor: Optional[Union[str, BaseImageProcessor]] = None, framework: Optional[str] = None, diff --git a/tests/models/nllb_moe/test_modeling_nllb_moe.py b/tests/models/nllb_moe/test_modeling_nllb_moe.py index 567aab56eaeac5..76cf4c0ea48c77 100644 --- a/tests/models/nllb_moe/test_modeling_nllb_moe.py +++ b/tests/models/nllb_moe/test_modeling_nllb_moe.py @@ -354,14 +354,14 @@ def model_inputs(self): @cached_property def tokenizer(self): - return NllbTokenizer.from_pretrained("ArthurZ/random-nllb-moe-2-experts") + return NllbTokenizer.from_pretrained("hf-internal-testing/random-nllb-moe-2-experts") @cached_property def big_model(self): return NllbMoeForConditionalGeneration.from_pretrained("facebook/nllb-moe-54b") def inference_no_head(self): - model = NllbMoeModel.from_pretrained("ArthurZ/random-nllb-moe-2-experts").eval() + model = NllbMoeModel.from_pretrained("hf-internal-testing/random-nllb-moe-2-experts").eval() with torch.no_grad(): output = model(**self.model_inputs) # fmt: off @@ -382,7 +382,7 @@ def test_inference_logits(self): and `transformers` implementation of NLLB-MoE transformers. We only check the logits of the second sample of the batch, as it is padded. """ - model = NllbMoeForConditionalGeneration.from_pretrained("ArthurZ/random-nllb-moe-2-experts").eval() + model = NllbMoeForConditionalGeneration.from_pretrained("hf-internal-testing/random-nllb-moe-2-experts").eval() with torch.no_grad(): output = model(**self.model_inputs) diff --git a/tests/repo_utils/test_tests_fetcher.py b/tests/repo_utils/test_tests_fetcher.py index cd0109b5359d4e..e02a917700dd2f 100644 --- a/tests/repo_utils/test_tests_fetcher.py +++ b/tests/repo_utils/test_tests_fetcher.py @@ -13,52 +13,661 @@ # limitations under the License. import os +import shutil import sys +import tempfile import unittest +from contextlib import contextmanager +from pathlib import Path from git import Repo +from transformers.testing_utils import CaptureStdout -git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) -sys.path.append(os.path.join(git_repo_path, "utils")) -transformers_path = os.path.join(git_repo_path, "src", "transformers") -# Tests are run against this specific commit for reproducibility -# https://github.com/huggingface/transformers/tree/07f6690206e39ed7a4d9dbc58824314f7089bb38 -GIT_TEST_SHA = "07f6690206e39ed7a4d9dbc58824314f7089bb38" +REPO_PATH = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) +sys.path.append(os.path.join(REPO_PATH, "utils")) -from tests_fetcher import checkout_commit, clean_code, get_module_dependencies # noqa: E402 +import tests_fetcher # noqa: E402 +from tests_fetcher import ( # noqa: E402 + checkout_commit, + clean_code, + create_module_to_test_map, + create_reverse_dependency_map, + create_reverse_dependency_tree, + diff_is_docstring_only, + extract_imports, + get_all_tests, + get_diff, + get_module_dependencies, + get_tree_starting_at, + infer_tests_to_run, + parse_commit_message, + print_tree_deps_of, +) -class CheckDummiesTester(unittest.TestCase): +BERT_MODELING_FILE = "src/transformers/models/bert/modeling_bert.py" +BERT_MODEL_FILE = """from ...modeling_utils import PreTrainedModel +from ...utils import is_torch_available +from .configuration_bert import BertConfig + +class BertModel: + ''' + This is the docstring. + ''' + This is the code +""" + +BERT_MODEL_FILE_NEW_DOCSTRING = """from ...modeling_utils import PreTrainedModel +from ...utils import is_torch_available +from .configuration_bert import BertConfig + +class BertModel: + ''' + This is the docstring. It has been updated. + ''' + This is the code +""" + +BERT_MODEL_FILE_NEW_CODE = """from ...modeling_utils import PreTrainedModel +from ...utils import is_torch_available +from .configuration_bert import BertConfig + +class BertModel: + ''' + This is the docstring. + ''' + This is the code. It has been updated +""" + + +def create_tmp_repo(tmp_dir, models=None): + """ + Creates a repository in a temporary directory mimicking the structure of Transformers. Uses the list of models + provided (which defaults to just `["bert"]`). + """ + tmp_dir = Path(tmp_dir) + if tmp_dir.exists(): + shutil.rmtree(tmp_dir) + tmp_dir.mkdir(exist_ok=True) + repo = Repo.init(tmp_dir) + + if models is None: + models = ["bert"] + class_names = [model[0].upper() + model[1:] for model in models] + + transformers_dir = tmp_dir / "src" / "transformers" + transformers_dir.mkdir(parents=True, exist_ok=True) + with open(transformers_dir / "__init__.py", "w") as f: + init_lines = ["from .utils import cached_file, is_torch_available"] + init_lines.extend( + [f"from .models.{model} import {cls}Config, {cls}Model" for model, cls in zip(models, class_names)] + ) + f.write("\n".join(init_lines) + "\n") + with open(transformers_dir / "configuration_utils.py", "w") as f: + f.write("from .utils import cached_file\n\ncode") + with open(transformers_dir / "modeling_utils.py", "w") as f: + f.write("from .utils import cached_file\n\ncode") + + utils_dir = tmp_dir / "src" / "transformers" / "utils" + utils_dir.mkdir(exist_ok=True) + with open(utils_dir / "__init__.py", "w") as f: + f.write("from .hub import cached_file\nfrom .imports import is_torch_available\n") + with open(utils_dir / "hub.py", "w") as f: + f.write("import huggingface_hub\n\ncode") + with open(utils_dir / "imports.py", "w") as f: + f.write("code") + + model_dir = tmp_dir / "src" / "transformers" / "models" + model_dir.mkdir(parents=True, exist_ok=True) + with open(model_dir / "__init__.py", "w") as f: + f.write("\n".join([f"import {model}" for model in models])) + + for model, cls in zip(models, class_names): + model_dir = tmp_dir / "src" / "transformers" / "models" / model + model_dir.mkdir(parents=True, exist_ok=True) + with open(model_dir / "__init__.py", "w") as f: + f.write(f"from .configuration_{model} import {cls}Config\nfrom .modeling_{model} import {cls}Model\n") + with open(model_dir / f"configuration_{model}.py", "w") as f: + f.write("from ...configuration_utils import PretrainedConfig\ncode") + with open(model_dir / f"modeling_{model}.py", "w") as f: + modeling_code = BERT_MODEL_FILE.replace("bert", model).replace("Bert", cls) + f.write(modeling_code) + + test_dir = tmp_dir / "tests" + test_dir.mkdir(exist_ok=True) + with open(test_dir / "test_modeling_common.py", "w") as f: + f.write("from transformers.modeling_utils import PreTrainedModel\ncode") + + for model, cls in zip(models, class_names): + test_model_dir = test_dir / "models" / model + test_model_dir.mkdir(parents=True, exist_ok=True) + (test_model_dir / "__init__.py").touch() + with open(test_model_dir / f"test_modeling_{model}.py", "w") as f: + f.write( + f"from transformers import {cls}Config, {cls}Model\nfrom ...test_modeling_common import ModelTesterMixin\n\ncode" + ) + + repo.index.add(["src", "tests"]) + repo.index.commit("Initial commit") + repo.create_head("main") + repo.head.reference = repo.refs.main + repo.delete_head("master") + return repo + + +@contextmanager +def patch_transformer_repo_path(new_folder): + """ + Temporarily patches the variables defines in `tests_fetcher` to use a different location for the repo. + """ + old_repo_path = tests_fetcher.PATH_TO_REPO + tests_fetcher.PATH_TO_REPO = Path(new_folder).resolve() + tests_fetcher.PATH_TO_TRANFORMERS = tests_fetcher.PATH_TO_REPO / "src/transformers" + tests_fetcher.PATH_TO_TESTS = tests_fetcher.PATH_TO_REPO / "tests" + try: + yield + finally: + tests_fetcher.PATH_TO_REPO = old_repo_path + tests_fetcher.PATH_TO_TRANFORMERS = tests_fetcher.PATH_TO_REPO / "src/transformers" + tests_fetcher.PATH_TO_TESTS = tests_fetcher.PATH_TO_REPO / "tests" + + +def commit_changes(filenames, contents, repo, commit_message="Commit"): + """ + Commit new `contents` to `filenames` inside a given `repo`. + """ + if not isinstance(filenames, list): + filenames = [filenames] + if not isinstance(contents, list): + contents = [contents] + + folder = Path(repo.working_dir) + for filename, content in zip(filenames, contents): + with open(folder / filename, "w") as f: + f.write(content) + repo.index.add(filenames) + commit = repo.index.commit(commit_message) + return commit.hexsha + + +class TestFetcherTester(unittest.TestCase): + def test_checkout_commit(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + repo = create_tmp_repo(tmp_folder) + initial_sha = repo.head.commit.hexsha + new_sha = commit_changes(BERT_MODELING_FILE, BERT_MODEL_FILE_NEW_DOCSTRING, repo) + + assert repo.head.commit.hexsha == new_sha + with checkout_commit(repo, initial_sha): + assert repo.head.commit.hexsha == initial_sha + with open(tmp_folder / BERT_MODELING_FILE) as f: + assert f.read() == BERT_MODEL_FILE + + assert repo.head.commit.hexsha == new_sha + with open(tmp_folder / BERT_MODELING_FILE) as f: + assert f.read() == BERT_MODEL_FILE_NEW_DOCSTRING + def test_clean_code(self): # Clean code removes all strings in triple quotes - self.assertEqual(clean_code('"""\nDocstring\n"""\ncode\n"""Long string"""\ncode\n'), "code\ncode") - self.assertEqual(clean_code("'''\nDocstring\n'''\ncode\n'''Long string'''\ncode\n'''"), "code\ncode") + assert clean_code('"""\nDocstring\n"""\ncode\n"""Long string"""\ncode\n') == "code\ncode" + assert clean_code("'''\nDocstring\n'''\ncode\n'''Long string'''\ncode\n'''") == "code\ncode" # Clean code removes all comments - self.assertEqual(clean_code("code\n# Comment\ncode"), "code\ncode") - self.assertEqual(clean_code("code # inline comment\ncode"), "code \ncode") + assert clean_code("code\n# Comment\ncode") == "code\ncode" + assert clean_code("code # inline comment\ncode") == "code \ncode" - def test_checkout_commit(self): - repo = Repo(git_repo_path) - self.assertNotEqual(repo.head.commit.hexsha, GIT_TEST_SHA) - with checkout_commit(repo, GIT_TEST_SHA): - self.assertEqual(repo.head.commit.hexsha, GIT_TEST_SHA) - self.assertNotEqual(repo.head.commit.hexsha, GIT_TEST_SHA) + def test_get_all_tests(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + create_tmp_repo(tmp_folder) + with patch_transformer_repo_path(tmp_folder): + assert get_all_tests() == ["tests/models/bert", "tests/test_modeling_common.py"] + + def test_get_all_tests_on_full_repo(self): + all_tests = get_all_tests() + assert "tests/models/albert" in all_tests + assert "tests/models/bert" in all_tests + assert "tests/repo_utils" in all_tests + assert "tests/test_pipeline_mixin.py" in all_tests + assert "tests/models" not in all_tests + assert "tests/__pycache__" not in all_tests + assert "tests/models/albert/test_modeling_albert.py" not in all_tests + assert "tests/repo_utils/test_tests_fetcher.py" not in all_tests + + def test_diff_is_docstring_only(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + repo = create_tmp_repo(tmp_folder) + + branching_point = repo.refs.main.commit + bert_file = BERT_MODELING_FILE + commit_changes(bert_file, BERT_MODEL_FILE_NEW_DOCSTRING, repo) + assert diff_is_docstring_only(repo, branching_point, bert_file) + + commit_changes(bert_file, BERT_MODEL_FILE_NEW_CODE, repo) + assert not diff_is_docstring_only(repo, branching_point, bert_file) + + def test_get_diff(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + repo = create_tmp_repo(tmp_folder) + + initial_commit = repo.refs.main.commit + bert_file = BERT_MODELING_FILE + commit_changes(bert_file, BERT_MODEL_FILE_NEW_DOCSTRING, repo) + assert get_diff(repo, repo.head.commit, repo.head.commit.parents) == [] + + commit_changes(bert_file, BERT_MODEL_FILE_NEW_DOCSTRING + "\n# Adding a comment\n", repo) + assert get_diff(repo, repo.head.commit, repo.head.commit.parents) == [] + + commit_changes(bert_file, BERT_MODEL_FILE_NEW_CODE, repo) + assert get_diff(repo, repo.head.commit, repo.head.commit.parents) == [ + "src/transformers/models/bert/modeling_bert.py" + ] + + commit_changes("src/transformers/utils/hub.py", "import huggingface_hub\n\nnew code", repo) + assert get_diff(repo, repo.head.commit, repo.head.commit.parents) == ["src/transformers/utils/hub.py"] + assert get_diff(repo, repo.head.commit, [initial_commit]) == [ + "src/transformers/models/bert/modeling_bert.py", + "src/transformers/utils/hub.py", + ] + + def test_extract_imports_relative(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + create_tmp_repo(tmp_folder) + + expected_bert_imports = [ + ("src/transformers/modeling_utils.py", ["PreTrainedModel"]), + ("src/transformers/utils/__init__.py", ["is_torch_available"]), + ("src/transformers/models/bert/configuration_bert.py", ["BertConfig"]), + ] + expected_utils_imports = [ + ("src/transformers/utils/hub.py", ["cached_file"]), + ("src/transformers/utils/imports.py", ["is_torch_available"]), + ] + with patch_transformer_repo_path(tmp_folder): + assert extract_imports(BERT_MODELING_FILE) == expected_bert_imports + assert extract_imports("src/transformers/utils/__init__.py") == expected_utils_imports + + with open(tmp_folder / BERT_MODELING_FILE, "w") as f: + f.write( + "from ...utils import cached_file, is_torch_available\nfrom .configuration_bert import BertConfig\n" + ) + expected_bert_imports = [ + ("src/transformers/utils/__init__.py", ["cached_file", "is_torch_available"]), + ("src/transformers/models/bert/configuration_bert.py", ["BertConfig"]), + ] + with patch_transformer_repo_path(tmp_folder): + assert extract_imports(BERT_MODELING_FILE) == expected_bert_imports + + # Test with multi-line imports + with open(tmp_folder / BERT_MODELING_FILE, "w") as f: + f.write( + "from ...utils import (\n cached_file,\n is_torch_available\n)\nfrom .configuration_bert import BertConfig\n" + ) + expected_bert_imports = [ + ("src/transformers/models/bert/configuration_bert.py", ["BertConfig"]), + ("src/transformers/utils/__init__.py", ["cached_file", "is_torch_available"]), + ] + with patch_transformer_repo_path(tmp_folder): + assert extract_imports(BERT_MODELING_FILE) == expected_bert_imports + + def test_extract_imports_absolute(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + create_tmp_repo(tmp_folder) + + with open(tmp_folder / BERT_MODELING_FILE, "w") as f: + f.write( + "from transformers.utils import cached_file, is_torch_available\nfrom transformers.models.bert.configuration_bert import BertConfig\n" + ) + expected_bert_imports = [ + ("src/transformers/utils/__init__.py", ["cached_file", "is_torch_available"]), + ("src/transformers/models/bert/configuration_bert.py", ["BertConfig"]), + ] + with patch_transformer_repo_path(tmp_folder): + assert extract_imports(BERT_MODELING_FILE) == expected_bert_imports + + # Test with multi-line imports + with open(tmp_folder / BERT_MODELING_FILE, "w") as f: + f.write( + "from transformers.utils import (\n cached_file,\n is_torch_available\n)\nfrom transformers.models.bert.configuration_bert import BertConfig\n" + ) + expected_bert_imports = [ + ("src/transformers/models/bert/configuration_bert.py", ["BertConfig"]), + ("src/transformers/utils/__init__.py", ["cached_file", "is_torch_available"]), + ] + with patch_transformer_repo_path(tmp_folder): + assert extract_imports(BERT_MODELING_FILE) == expected_bert_imports + + # Test with base imports + with open(tmp_folder / BERT_MODELING_FILE, "w") as f: + f.write( + "from transformers.utils import (\n cached_file,\n is_torch_available\n)\nfrom transformers import BertConfig\n" + ) + expected_bert_imports = [ + ("src/transformers/__init__.py", ["BertConfig"]), + ("src/transformers/utils/__init__.py", ["cached_file", "is_torch_available"]), + ] + with patch_transformer_repo_path(tmp_folder): + assert extract_imports(BERT_MODELING_FILE) == expected_bert_imports def test_get_module_dependencies(self): - bert_module = os.path.join(transformers_path, "models", "bert", "modeling_bert.py") - expected_deps = [ - "activations.py", - "modeling_outputs.py", - "modeling_utils.py", - "pytorch_utils.py", - "models/bert/configuration_bert.py", - ] - expected_deps = {os.path.join(transformers_path, f) for f in expected_deps} - repo = Repo(git_repo_path) - with checkout_commit(repo, GIT_TEST_SHA): - deps = get_module_dependencies(bert_module) - deps = {os.path.expanduser(f) for f in deps} - self.assertEqual(deps, expected_deps) + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + create_tmp_repo(tmp_folder) + + expected_bert_dependencies = [ + "src/transformers/modeling_utils.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/utils/imports.py", + ] + with patch_transformer_repo_path(tmp_folder): + assert get_module_dependencies(BERT_MODELING_FILE) == expected_bert_dependencies + + expected_test_bert_dependencies = [ + "tests/test_modeling_common.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/models/bert/modeling_bert.py", + ] + + with patch_transformer_repo_path(tmp_folder): + assert ( + get_module_dependencies("tests/models/bert/test_modeling_bert.py") + == expected_test_bert_dependencies + ) + + # Test with a submodule + (tmp_folder / "src/transformers/utils/logging.py").touch() + with open(tmp_folder / BERT_MODELING_FILE, "a") as f: + f.write("from ...utils import logging\n") + + expected_bert_dependencies = [ + "src/transformers/modeling_utils.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/utils/logging.py", + "src/transformers/utils/imports.py", + ] + with patch_transformer_repo_path(tmp_folder): + assert get_module_dependencies(BERT_MODELING_FILE) == expected_bert_dependencies + + # Test with an object non-imported in the init + create_tmp_repo(tmp_folder) + with open(tmp_folder / BERT_MODELING_FILE, "a") as f: + f.write("from ...utils import CONSTANT\n") + + expected_bert_dependencies = [ + "src/transformers/modeling_utils.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/utils/__init__.py", + "src/transformers/utils/imports.py", + ] + with patch_transformer_repo_path(tmp_folder): + assert get_module_dependencies(BERT_MODELING_FILE) == expected_bert_dependencies + + def test_create_reverse_dependency_tree(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + create_tmp_repo(tmp_folder) + with patch_transformer_repo_path(tmp_folder): + tree = create_reverse_dependency_tree() + + init_edges = [ + "src/transformers/utils/hub.py", + "src/transformers/utils/imports.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/models/bert/modeling_bert.py", + ] + assert {f for f, g in tree if g == "src/transformers/__init__.py"} == set(init_edges) + + bert_edges = [ + "src/transformers/modeling_utils.py", + "src/transformers/utils/imports.py", + "src/transformers/models/bert/configuration_bert.py", + ] + assert {f for f, g in tree if g == "src/transformers/models/bert/modeling_bert.py"} == set(bert_edges) + + test_bert_edges = [ + "tests/test_modeling_common.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/models/bert/modeling_bert.py", + ] + assert {f for f, g in tree if g == "tests/models/bert/test_modeling_bert.py"} == set(test_bert_edges) + + def test_get_tree_starting_at(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + create_tmp_repo(tmp_folder) + with patch_transformer_repo_path(tmp_folder): + edges = create_reverse_dependency_tree() + + bert_tree = get_tree_starting_at("src/transformers/models/bert/modeling_bert.py", edges) + config_utils_tree = get_tree_starting_at("src/transformers/configuration_utils.py", edges) + + expected_bert_tree = [ + "src/transformers/models/bert/modeling_bert.py", + [("src/transformers/models/bert/modeling_bert.py", "tests/models/bert/test_modeling_bert.py")], + ] + assert bert_tree == expected_bert_tree + + expected_config_tree = [ + "src/transformers/configuration_utils.py", + [("src/transformers/configuration_utils.py", "src/transformers/models/bert/configuration_bert.py")], + [ + ("src/transformers/models/bert/configuration_bert.py", "tests/models/bert/test_modeling_bert.py"), + ( + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/models/bert/modeling_bert.py", + ), + ], + ] + # Order of the edges is random + assert [set(v) for v in config_utils_tree] == [set(v) for v in expected_config_tree] + + def test_print_tree_deps_of(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + create_tmp_repo(tmp_folder) + + # There are two possible outputs since the order of the last two lines is non-deterministic. + expected_std_out = """src/transformers/models/bert/modeling_bert.py + tests/models/bert/test_modeling_bert.py +src/transformers/configuration_utils.py + src/transformers/models/bert/configuration_bert.py + src/transformers/models/bert/modeling_bert.py + tests/models/bert/test_modeling_bert.py""" + + expected_std_out_2 = """src/transformers/models/bert/modeling_bert.py + tests/models/bert/test_modeling_bert.py +src/transformers/configuration_utils.py + src/transformers/models/bert/configuration_bert.py + tests/models/bert/test_modeling_bert.py + src/transformers/models/bert/modeling_bert.py""" + + with patch_transformer_repo_path(tmp_folder), CaptureStdout() as cs: + print_tree_deps_of("src/transformers/models/bert/modeling_bert.py") + print_tree_deps_of("src/transformers/configuration_utils.py") + + assert cs.out.strip() in [expected_std_out, expected_std_out_2] + + def test_create_reverse_dependency_map(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + create_tmp_repo(tmp_folder) + with patch_transformer_repo_path(tmp_folder): + reverse_map = create_reverse_dependency_map() + + # impact of BERT modeling file (note that we stop at the inits and don't go down further) + expected_bert_deps = { + "src/transformers/__init__.py", + "src/transformers/models/bert/__init__.py", + "tests/models/bert/test_modeling_bert.py", + } + assert set(reverse_map["src/transformers/models/bert/modeling_bert.py"]) == expected_bert_deps + + # init gets the direct deps (and their recursive deps) + expected_init_deps = { + "src/transformers/utils/__init__.py", + "src/transformers/utils/hub.py", + "src/transformers/utils/imports.py", + "src/transformers/models/bert/__init__.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/models/bert/modeling_bert.py", + "src/transformers/configuration_utils.py", + "src/transformers/modeling_utils.py", + "tests/test_modeling_common.py", + "tests/models/bert/test_modeling_bert.py", + } + assert set(reverse_map["src/transformers/__init__.py"]) == expected_init_deps + + expected_init_deps = { + "src/transformers/__init__.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/models/bert/modeling_bert.py", + "tests/models/bert/test_modeling_bert.py", + } + assert set(reverse_map["src/transformers/models/bert/__init__.py"]) == expected_init_deps + + # Test that with more models init of bert only gets deps to bert. + create_tmp_repo(tmp_folder, models=["bert", "gpt2"]) + with patch_transformer_repo_path(tmp_folder): + reverse_map = create_reverse_dependency_map() + + # init gets the direct deps (and their recursive deps) + expected_init_deps = { + "src/transformers/__init__.py", + "src/transformers/models/bert/configuration_bert.py", + "src/transformers/models/bert/modeling_bert.py", + "tests/models/bert/test_modeling_bert.py", + } + assert set(reverse_map["src/transformers/models/bert/__init__.py"]) == expected_init_deps + + def test_create_module_to_test_map(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + models = models = ["bert", "gpt2"] + [f"bert{i}" for i in range(10)] + create_tmp_repo(tmp_folder, models=models) + with patch_transformer_repo_path(tmp_folder): + test_map = create_module_to_test_map(filter_models=True) + + for model in models: + assert test_map[f"src/transformers/models/{model}/modeling_{model}.py"] == [ + f"tests/models/{model}/test_modeling_{model}.py" + ] + + # Init got filtered + expected_init_tests = { + "tests/test_modeling_common.py", + "tests/models/bert/test_modeling_bert.py", + "tests/models/gpt2/test_modeling_gpt2.py", + } + assert set(test_map["src/transformers/__init__.py"]) == expected_init_tests + + def test_infer_tests_to_run(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + models = ["bert", "gpt2"] + [f"bert{i}" for i in range(10)] + repo = create_tmp_repo(tmp_folder, models=models) + + commit_changes("src/transformers/models/bert/modeling_bert.py", BERT_MODEL_FILE_NEW_CODE, repo) + + with patch_transformer_repo_path(tmp_folder): + infer_tests_to_run(tmp_folder / "test-output.txt", diff_with_last_commit=True) + with open(tmp_folder / "test-output.txt", "r") as f: + tests_to_run = f.read() + + assert tests_to_run == "tests/models/bert/test_modeling_bert.py" + + # Fake a new model addition + repo = create_tmp_repo(tmp_folder, models=models) + + branch = repo.create_head("new_model") + branch.checkout() + + with open(tmp_folder / "src/transformers/__init__.py", "a") as f: + f.write("from .models.t5 import T5Config, T5Model\n") + + model_dir = tmp_folder / "src/transformers/models/t5" + model_dir.mkdir(exist_ok=True) + + with open(model_dir / "__init__.py", "w") as f: + f.write("from .configuration_t5 import T5Config\nfrom .modeling_t5 import T5Model\n") + with open(model_dir / "configuration_t5.py", "w") as f: + f.write("from ...configuration_utils import PretrainedConfig\ncode") + with open(model_dir / "modeling_t5.py", "w") as f: + modeling_code = BERT_MODEL_FILE.replace("bert", "t5").replace("Bert", "T5") + f.write(modeling_code) + + test_dir = tmp_folder / "tests/models/t5" + test_dir.mkdir(exist_ok=True) + (test_dir / "__init__.py").touch() + with open(test_dir / "test_modeling_t5.py", "w") as f: + f.write( + "from transformers import T5Config, T5Model\nfrom ...test_modeling_common import ModelTesterMixin\n\ncode" + ) + + repo.index.add(["src", "tests"]) + repo.index.commit("Add T5 model") + + with patch_transformer_repo_path(tmp_folder): + infer_tests_to_run(tmp_folder / "test-output.txt") + with open(tmp_folder / "test-output.txt", "r") as f: + tests_to_run = f.read() + + expected_tests = { + "tests/models/bert/test_modeling_bert.py", + "tests/models/gpt2/test_modeling_gpt2.py", + "tests/models/t5/test_modeling_t5.py", + "tests/test_modeling_common.py", + } + assert set(tests_to_run.split(" ")) == expected_tests + + with patch_transformer_repo_path(tmp_folder): + infer_tests_to_run(tmp_folder / "test-output.txt", filter_models=False) + with open(tmp_folder / "test-output.txt", "r") as f: + tests_to_run = f.read() + + expected_tests = [f"tests/models/{name}/test_modeling_{name}.py" for name in models + ["t5"]] + expected_tests = set(expected_tests + ["tests/test_modeling_common.py"]) + assert set(tests_to_run.split(" ")) == expected_tests + + def test_infer_tests_to_run_with_test_modifs(self): + with tempfile.TemporaryDirectory() as tmp_folder: + tmp_folder = Path(tmp_folder) + models = ["bert", "gpt2"] + [f"bert{i}" for i in range(10)] + repo = create_tmp_repo(tmp_folder, models=models) + + commit_changes( + "tests/models/bert/test_modeling_bert.py", + "from transformers import BertConfig, BertModel\nfrom ...test_modeling_common import ModelTesterMixin\n\ncode1", + repo, + ) + + with patch_transformer_repo_path(tmp_folder): + infer_tests_to_run(tmp_folder / "test-output.txt", diff_with_last_commit=True) + with open(tmp_folder / "test-output.txt", "r") as f: + tests_to_run = f.read() + + assert tests_to_run == "tests/models/bert/test_modeling_bert.py" + + def test_parse_commit_message(self): + assert parse_commit_message("Normal commit") == {"skip": False, "no_filter": False, "test_all": False} + + assert parse_commit_message("[skip ci] commit") == {"skip": True, "no_filter": False, "test_all": False} + assert parse_commit_message("[ci skip] commit") == {"skip": True, "no_filter": False, "test_all": False} + assert parse_commit_message("[skip-ci] commit") == {"skip": True, "no_filter": False, "test_all": False} + assert parse_commit_message("[skip_ci] commit") == {"skip": True, "no_filter": False, "test_all": False} + + assert parse_commit_message("[no filter] commit") == {"skip": False, "no_filter": True, "test_all": False} + assert parse_commit_message("[no-filter] commit") == {"skip": False, "no_filter": True, "test_all": False} + assert parse_commit_message("[no_filter] commit") == {"skip": False, "no_filter": True, "test_all": False} + assert parse_commit_message("[filter-no] commit") == {"skip": False, "no_filter": True, "test_all": False} + + assert parse_commit_message("[test all] commit") == {"skip": False, "no_filter": False, "test_all": True} + assert parse_commit_message("[all test] commit") == {"skip": False, "no_filter": False, "test_all": True} + assert parse_commit_message("[test-all] commit") == {"skip": False, "no_filter": False, "test_all": True} + assert parse_commit_message("[all_test] commit") == {"skip": False, "no_filter": False, "test_all": True} diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index f71366d2183829..030555aece7365 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -27,6 +27,7 @@ import unittest import unittest.mock as mock import warnings +from collections import defaultdict from pathlib import Path from typing import Dict, List, Tuple @@ -1626,6 +1627,41 @@ def check_same_values(layer_1, layer_2): # self.assertTrue(model.transformer.wte.weight.shape, model.lm_head.weight.shape) # self.assertTrue(check_same_values(model.transformer.wte, model.lm_head)) + @require_safetensors + def test_can_use_safetensors(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + for model_class in self.all_model_classes: + model_tied = model_class(config) + with tempfile.TemporaryDirectory() as d: + try: + model_tied.save_pretrained(d, safe_serialization=True) + except Exception as e: + raise Exception(f"Class {model_class.__name__} cannot be saved using safetensors: {e}") + + model_reloaded, infos = model_class.from_pretrained(d, output_loading_info=True) + # Checking the state dicts are correct + reloaded_state = model_reloaded.state_dict() + for k, v in model_tied.state_dict().items(): + self.assertIn(k, reloaded_state, f"Key {k} is missing from reloaded") + torch.testing.assert_close( + v, reloaded_state[k], msg=lambda x: f"{model_class.__name__}: Tensor {k}: {x}" + ) + + # Checking the tensor sharing are correct + ptrs = defaultdict(list) + for k, v in model_tied.state_dict().items(): + ptrs[v.data_ptr()].append(k) + + shared_ptrs = {k: v for k, v in ptrs.items() if len(v) > 1} + + for _, shared_names in shared_ptrs.items(): + reloaded_ptrs = {reloaded_state[k].data_ptr() for k in shared_names} + self.assertEqual( + len(reloaded_ptrs), + 1, + f"The shared pointers are incorrect, found different pointers for keys {shared_names}", + ) + def test_tied_model_weights_key_ignore(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: diff --git a/utils/get_ci_error_statistics.py b/utils/get_ci_error_statistics.py index 09dc4d7dd226e7..e9dc52b5bbe0d4 100644 --- a/utils/get_ci_error_statistics.py +++ b/utils/get_ci_error_statistics.py @@ -66,12 +66,12 @@ def get_artifacts_links(worflow_run_id, token=None): def download_artifact(artifact_name, artifact_url, output_dir, token): """Download a GitHub Action artifact from a URL. - The URL is of the from `https://api.github.com/repos/huggingface/transformers/actions/artifacts/{ARTIFACT_ID}/zip`, + The URL is of the form `https://api.github.com/repos/huggingface/transformers/actions/artifacts/{ARTIFACT_ID}/zip`, but it can't be used to download directly. We need to get a redirect URL first. See https://docs.github.com/en/rest/actions/artifacts#download-an-artifact """ # Get the redirect URL first - cmd = f'curl -v -H "Accept: application/vnd.github+json" -H "Authorization: token {token}" {artifact_url}' + cmd = f'curl -v -H "Accept: application/vnd.github+json" -H "Authorization: Bearer {token}" {artifact_url}' output = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) o = output.stdout.decode("utf-8") lines = o.splitlines() diff --git a/utils/notification_service.py b/utils/notification_service.py index 0aefd5844d325c..7251b4d400c935 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -590,23 +590,20 @@ def post_reply(self): time.sleep(1) -def retrieve_artifact(name: str, gpu: Optional[str]): +def retrieve_artifact(artifact_path: str, gpu: Optional[str]): if gpu not in [None, "single", "multi"]: raise ValueError(f"Invalid GPU for artifact. Passed GPU: `{gpu}`.") - if gpu is not None: - name = f"{gpu}-gpu_{name}" - _artifact = {} - if os.path.exists(name): - files = os.listdir(name) + if os.path.exists(artifact_path): + files = os.listdir(artifact_path) for file in files: try: - with open(os.path.join(name, file)) as f: + with open(os.path.join(artifact_path, file)) as f: _artifact[file.split(".")[0]] = f.read() except UnicodeDecodeError as e: - raise ValueError(f"Could not open {os.path.join(name, file)}.") from e + raise ValueError(f"Could not open {os.path.join(artifact_path, file)}.") from e return _artifact @@ -629,8 +626,14 @@ def add_path(self, path: str, gpu: str = None): directories = filter(os.path.isdir, os.listdir()) for directory in directories: - if directory.startswith("single-gpu"): - artifact_name = directory[len("single-gpu") + 1 :] + artifact_name = directory + + name_parts = artifact_name.split("_postfix_") + if len(name_parts) > 1: + artifact_name = name_parts[0] + + if artifact_name.startswith("single-gpu"): + artifact_name = artifact_name[len("single-gpu") + 1 :] if artifact_name in _available_artifacts: _available_artifacts[artifact_name].single_gpu = True @@ -639,7 +642,7 @@ def add_path(self, path: str, gpu: str = None): _available_artifacts[artifact_name].add_path(directory, gpu="single") - elif directory.startswith("multi-gpu"): + elif artifact_name.startswith("multi-gpu"): artifact_name = directory[len("multi-gpu") + 1 :] if artifact_name in _available_artifacts: @@ -649,7 +652,6 @@ def add_path(self, path: str, gpu: str = None): _available_artifacts[artifact_name].add_path(directory, gpu="multi") else: - artifact_name = directory if artifact_name not in _available_artifacts: _available_artifacts[artifact_name] = Artifact(artifact_name) @@ -805,10 +807,12 @@ def prepare_reports(title, header, reports, to_truncate=True): framework, version = ci_event.replace("Past CI - ", "").split("-") framework = "PyTorch" if framework == "pytorch" else "TensorFlow" job_name_prefix = f"{framework} {version}" + elif ci_event.startswith("Nightly CI"): + job_name_prefix = "Nightly CI" for model in model_results.keys(): for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths: - artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"]) + artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) if "stats" in artifact: # Link to the GitHub Action job # The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert` @@ -901,7 +905,7 @@ def prepare_reports(title, header, reports, to_truncate=True): else: additional_results[key]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(key) - artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"]) + artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) stacktraces = handle_stacktraces(artifact["failures_line"]) failed, success, time_spent = handle_test_results(artifact["stats"]) diff --git a/utils/past_ci_versions.py b/utils/past_ci_versions.py index c50bbb9b14c98e..61495ab2a46fcd 100644 --- a/utils/past_ci_versions.py +++ b/utils/past_ci_versions.py @@ -4,6 +4,18 @@ past_versions_testing = { "pytorch": { + "1.13": { + "torch": "1.13.1", + "torchvision": "0.14.1", + "torchaudio": "0.13.1", + "python": 3.9, + "cuda": "cu116", + "install": ( + "python3 -m pip install --no-cache-dir -U torch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1" + " --extra-index-url https://download.pytorch.org/whl/cu116" + ), + "base_image": "nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04", + }, "1.12": { "torch": "1.12.1", "torchvision": "0.13.1", @@ -14,6 +26,7 @@ "python3 -m pip install --no-cache-dir -U torch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1" " --extra-index-url https://download.pytorch.org/whl/cu113" ), + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, "1.11": { "torch": "1.11.0", @@ -25,6 +38,7 @@ "python3 -m pip install --no-cache-dir -U torch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0" " --extra-index-url https://download.pytorch.org/whl/cu113" ), + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, "1.10": { "torch": "1.10.2", @@ -36,6 +50,7 @@ "python3 -m pip install --no-cache-dir -U torch==1.10.2 torchvision==0.11.3 torchaudio==0.10.2" " --extra-index-url https://download.pytorch.org/whl/cu113" ), + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, # torchaudio < 0.10 has no CUDA-enabled binary distributions "1.9": { @@ -48,87 +63,44 @@ "python3 -m pip install --no-cache-dir -U torch==1.9.1 torchvision==0.10.1 torchaudio==0.9.1" " --extra-index-url https://download.pytorch.org/whl/cu111" ), + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, - "1.8": { - "torch": "1.8.1", - "torchvision": "0.9.1", - "torchaudio": "0.8.1", - "python": 3.9, - "cuda": "cu111", - "install": ( - "python3 -m pip install --no-cache-dir -U torch==1.8.1 torchvision==0.9.1 torchaudio==0.8.1" - " --extra-index-url https://download.pytorch.org/whl/cu111" - ), - }, - "1.7": { - "torch": "1.7.1", - "torchvision": "0.8.2", - "torchaudio": "0.7.2", - "python": 3.9, - "cuda": "cu110", - "install": ( - "python3 -m pip install --no-cache-dir -U torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2" - " --extra-index-url https://download.pytorch.org/whl/cu110" - ), - }, - "1.6": { - "torch": "1.6.0", - "torchvision": "0.7.0", - "torchaudio": "0.6.0", - "python": 3.8, - "cuda": "cu101", - "install": ( - "python3 -m pip install --no-cache-dir -U torch==1.6.0 torchvision==0.7.0 torchaudio==0.6.0" - " --extra-index-url https://download.pytorch.org/whl/cu101" - ), + }, + "tensorflow": { + "2.11": { + "tensorflow": "2.11.1", + "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.11.1", + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, - "1.5": { - "torch": "1.5.1", - "torchvision": "0.6.1", - "torchaudio": "0.5.1", - "python": 3.8, - "cuda": "cu101", - "install": ( - "python3 -m pip install --no-cache-dir -U torch==1.5.1 torchvision==0.6.1 torchaudio==0.5.1" - " --extra-index-url https://download.pytorch.org/whl/cu101" - ), + "2.10": { + "tensorflow": "2.10.1", + "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.10.1", + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, - "1.4": { - "torch": "1.4.0", - "torchvision": "0.5.0", - "torchaudio": "0.4.0", - "python": 3.8, - "cuda": "cu100", - "install": ( - "python3 -m pip install --no-cache-dir -U torch==1.4.0 torchvision==0.5.0 torchaudio==0.4.0" - " --extra-index-url https://download.pytorch.org/whl/cu100" - ), + "2.9": { + "tensorflow": "2.9.3", + "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.9.3", + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, - }, - "tensorflow": { "2.8": { "tensorflow": "2.8.2", "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.8.2", + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, "2.7": { "tensorflow": "2.7.3", "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.7.3", + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, "2.6": { "tensorflow": "2.6.5", "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.6.5", + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, "2.5": { "tensorflow": "2.5.3", "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.5.3", - }, - # need another `nvidia:cuda` docker image, otherwise GPU not working - "2.4": { - "tensorflow": "2.4.4", - "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.4.4", - # This should be specified as a docker build argument. - # We keep the information here for reference only. - "base_docker": "nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04", + "base_image": "nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04", }, }, } diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index 85a8e2e198f873..2a6ac35ce4f9ba 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -13,6 +13,27 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +Welcome to tests_fetcher V2. +This util is designed to fetch tests to run on a PR so that only the tests impacted by the modifications are run, and +when too many models are being impacted, only run the tests of a subset of core models. It works like this. + +Stage 1: Identify the modified files. This takes all the files from the branching point to the current commit (so +all modifications in a PR, not just the last commit) but excludes modifications that are on docstrings or comments +only. + +Stage 2: Extract the tests to run. This is done by looking at the imports in each module and test file: if module A +imports module B, then changing module B impacts module A, so the tests using module A should be run. We thus get the +dependencies of each model and then recursively builds the 'reverse' map of dependencies to get all modules and tests +impacted by a given file. We then only keep the tests (and only the code models tests if there are too many modules). + +Caveats: + - This module only filters tests by files (not individual tests) so it's better to have tests for different things + in different files. + - This module assumes inits are just importing things, not really building objects, so it's better to structure + them this way and move objects building in separate submodules. +""" + import argparse import collections import json @@ -24,13 +45,36 @@ from git import Repo -# This script is intended to be run from the root of the repo but you can adapt this constant if you need to. -PATH_TO_TRANFORMERS = "." - -# A temporary way to trigger all pipeline tests contained in model test files after PR #21516 -all_model_test_files = [str(x) for x in Path("tests/models/").glob("**/**/test_modeling_*.py")] - -all_pipeline_test_files = [str(x) for x in Path("tests/pipelines/").glob("**/test_pipelines_*.py")] +PATH_TO_REPO = Path(__file__).parent.parent.resolve() +PATH_TO_TRANFORMERS = PATH_TO_REPO / "src/transformers" +PATH_TO_TESTS = PATH_TO_REPO / "tests" + +# List here the models to always test. +IMPORTANT_MODELS = [ + # Most downloaded models + "bert", + "clip", + "t5", + "xlm-roberta", + "gpt2", + "bart", + "mpnet", + "gpt-j", + "wav2vec2", + "deberta-v2", + "layoutlm", + "opt", + "longformer", + "vit", + # Pipeline-specific model (to be sure each pipeline has one model in this list) + "tapas", + "vilt", + "clap", + "detr", + "owlvit", + "dpt", + "videomae", +] @contextmanager @@ -79,17 +123,21 @@ def get_all_tests(): - folders under `tests/models`: `bert`, `gpt2`, etc. - test files under `tests`: `test_modeling_common.py`, `test_tokenization_common.py`, etc. """ - test_root_dir = os.path.join(PATH_TO_TRANFORMERS, "tests") # test folders/files directly under `tests` folder - tests = os.listdir(test_root_dir) - tests = sorted(filter(lambda x: os.path.isdir(x) or x.startswith("tests/test_"), [f"tests/{x}" for x in tests])) + tests = os.listdir(PATH_TO_TESTS) + tests = [f"tests/{f}" for f in tests if "__pycache__" not in f] + tests = sorted([f for f in tests if (PATH_TO_REPO / f).is_dir() or f.startswith("tests/test_")]) # model specific test folders - model_tests_folders = os.listdir(os.path.join(test_root_dir, "models")) - model_test_folders = sorted(filter(os.path.isdir, [f"tests/models/{x}" for x in model_tests_folders])) + model_test_folders = os.listdir(PATH_TO_TESTS / "models") + model_test_folders = [f"tests/models/{f}" for f in model_test_folders if "__pycache__" not in f] + model_test_folders = sorted([f for f in model_test_folders if (PATH_TO_REPO / f).is_dir()]) tests.remove("tests/models") + # Sagemaker tests are not meant to be run on the CI. + if "tests/sagemaker" in tests: + tests.remove("tests/sagemaker") tests = model_test_folders + tests return tests @@ -99,11 +147,12 @@ def diff_is_docstring_only(repo, branching_point, filename): """ Check if the diff is only in docstrings in a filename. """ + folder = Path(repo.working_dir) with checkout_commit(repo, branching_point): - with open(filename, "r", encoding="utf-8") as f: + with open(folder / filename, "r", encoding="utf-8") as f: old_content = f.read() - with open(filename, "r", encoding="utf-8") as f: + with open(folder / filename, "r", encoding="utf-8") as f: new_content = f.read() old_content_clean = clean_code(old_content) @@ -112,31 +161,6 @@ def diff_is_docstring_only(repo, branching_point, filename): return old_content_clean == new_content_clean -def get_modified_python_files(diff_with_last_commit=False): - """ - Return a list of python files that have been modified between: - - - the current head and the main branch if `diff_with_last_commit=False` (default) - - the current head and its parent commit otherwise. - """ - repo = Repo(PATH_TO_TRANFORMERS) - - if not diff_with_last_commit: - print(f"main is at {repo.refs.main.commit}") - print(f"Current head is at {repo.head.commit}") - - branching_commits = repo.merge_base(repo.refs.main, repo.head) - for commit in branching_commits: - print(f"Branching commit: {commit}") - return get_diff(repo, repo.head.commit, branching_commits) - else: - print(f"main is at {repo.head.commit}") - parent_commits = repo.head.commit.parents - for commit in parent_commits: - print(f"Parent commit: {commit}") - return get_diff(repo, repo.head.commit, parent_commits) - - def get_diff(repo, base_commit, commits): """ Get's the diff between one or several commits and the head of the repository. @@ -166,96 +190,173 @@ def get_diff(repo, base_commit, commits): return code_diff -def get_module_dependencies(module_fname): +def get_modified_python_files(diff_with_last_commit=False): + """ + Return a list of python files that have been modified between: + + - the current head and the main branch if `diff_with_last_commit=False` (default) + - the current head and its parent commit otherwise. + """ + repo = Repo(PATH_TO_REPO) + + if not diff_with_last_commit: + print(f"main is at {repo.refs.main.commit}") + print(f"Current head is at {repo.head.commit}") + + branching_commits = repo.merge_base(repo.refs.main, repo.head) + for commit in branching_commits: + print(f"Branching commit: {commit}") + return get_diff(repo, repo.head.commit, branching_commits) + else: + print(f"main is at {repo.head.commit}") + parent_commits = repo.head.commit.parents + for commit in parent_commits: + print(f"Parent commit: {commit}") + return get_diff(repo, repo.head.commit, parent_commits) + + +# (:?^|\n) -> Non-catching group for the beginning of the doc or a new line. +# \s*from\s+(\.+\S+)\s+import\s+([^\n]+) -> Line only contains from .xxx import yyy and we catch .xxx and yyy +# (?=\n) -> Look-ahead to a new line. We can't just put \n here or using find_all on this re will only catch every +# other import. +_re_single_line_relative_imports = re.compile(r"(?:^|\n)\s*from\s+(\.+\S+)\s+import\s+([^\n]+)(?=\n)") +# (:?^|\n) -> Non-catching group for the beginning of the doc or a new line. +# \s*from\s+(\.+\S+)\s+import\s+\(([^\)]+)\) -> Line continues with from .xxx import (yyy) and we catch .xxx and yyy +# yyy will take multiple lines otherwise there wouldn't be parenthesis. +_re_multi_line_relative_imports = re.compile(r"(?:^|\n)\s*from\s+(\.+\S+)\s+import\s+\(([^\)]+)\)") +# (:?^|\n) -> Non-catching group for the beginning of the doc or a new line. +# \s*from\s+transformers(\S*)\s+import\s+([^\n]+) -> Line only contains from transformers.xxx import yyy and we catch +# .xxx and yyy +# (?=\n) -> Look-ahead to a new line. We can't just put \n here or using find_all on this re will only catch every +# other import. +_re_single_line_direct_imports = re.compile(r"(?:^|\n)\s*from\s+transformers(\S*)\s+import\s+([^\n]+)(?=\n)") +# (:?^|\n) -> Non-catching group for the beginning of the doc or a new line. +# \s*from\s+transformers(\S*)\s+import\s+\(([^\)]+)\) -> Line continues with from transformers.xxx import (yyy) and we +# catch .xxx and yyy. yyy will take multiple lines otherwise there wouldn't be parenthesis. +_re_multi_line_direct_imports = re.compile(r"(?:^|\n)\s*from\s+transformers(\S*)\s+import\s+\(([^\)]+)\)") + + +def extract_imports(module_fname, cache=None): """ - Get the dependencies of a module. + Get the imports a given module makes. This takes a module filename and returns the list of module filenames + imported in the module with the objects imported in that module filename. """ - with open(os.path.join(PATH_TO_TRANFORMERS, module_fname), "r", encoding="utf-8") as f: + if cache is not None and module_fname in cache: + return cache[module_fname] + + with open(PATH_TO_REPO / module_fname, "r", encoding="utf-8") as f: content = f.read() - module_parts = module_fname.split(os.path.sep) + # Filter out all docstrings to not get imports in code examples. + splits = content.split('"""') + content = "".join(splits[::2]) + + module_parts = str(module_fname).split(os.path.sep) imported_modules = [] # Let's start with relative imports - relative_imports = re.findall(r"from\s+(\.+\S+)\s+import\s+([^\n]+)\n", content) - relative_imports = [mod for mod, imp in relative_imports if "# tests_ignore" not in imp] - for imp in relative_imports: + relative_imports = _re_single_line_relative_imports.findall(content) + relative_imports = [ + (mod, imp) for mod, imp in relative_imports if "# tests_ignore" not in imp and imp.strip() != "(" + ] + multiline_relative_imports = _re_multi_line_relative_imports.findall(content) + relative_imports += [(mod, imp) for mod, imp in multiline_relative_imports if "# tests_ignore" not in imp] + + for module, imports in relative_imports: level = 0 - while imp.startswith("."): - imp = imp[1:] + while module.startswith("."): + module = module[1:] level += 1 - if len(imp) > 0: - dep_parts = module_parts[: len(module_parts) - level] + imp.split(".") + if len(module) > 0: + dep_parts = module_parts[: len(module_parts) - level] + module.split(".") else: - dep_parts = module_parts[: len(module_parts) - level] + ["__init__.py"] + dep_parts = module_parts[: len(module_parts) - level] imported_module = os.path.sep.join(dep_parts) - # We ignore the main init import as it's only for the __version__ that it's done - # and it would add everything as a dependency. - if not imported_module.endswith("transformers/__init__.py"): - imported_modules.append(imported_module) + imported_modules.append((imported_module, [imp.strip() for imp in imports.split(",")])) # Let's continue with direct imports - # The import from the transformers module are ignored for the same reason we ignored the - # main init before. - direct_imports = re.findall(r"from\s+transformers\.(\S+)\s+import\s+([^\n]+)\n", content) - direct_imports = [mod for mod, imp in direct_imports if "# tests_ignore" not in imp] - for imp in direct_imports: - import_parts = imp.split(".") - dep_parts = ["src", "transformers"] + import_parts - imported_modules.append(os.path.sep.join(dep_parts)) + direct_imports = _re_single_line_direct_imports.findall(content) + direct_imports = [(mod, imp) for mod, imp in direct_imports if "# tests_ignore" not in imp and imp.strip() != "("] + multiline_direct_imports = _re_multi_line_direct_imports.findall(content) + direct_imports += [(mod, imp) for mod, imp in multiline_direct_imports if "# tests_ignore" not in imp] - # Now let's just check that we have proper module files, or append an init for submodules - dependencies = [] - for imported_module in imported_modules: - if os.path.isfile(os.path.join(PATH_TO_TRANFORMERS, f"{imported_module}.py")): - dependencies.append(f"{imported_module}.py") - elif os.path.isdir(os.path.join(PATH_TO_TRANFORMERS, imported_module)) and os.path.isfile( - os.path.sep.join([PATH_TO_TRANFORMERS, imported_module, "__init__.py"]) - ): - dependencies.append(os.path.sep.join([imported_module, "__init__.py"])) - return dependencies + for module, imports in direct_imports: + import_parts = module.split(".")[1:] # ignore the first . + dep_parts = ["src", "transformers"] + import_parts + imported_module = os.path.sep.join(dep_parts) + imported_modules.append((imported_module, [imp.strip() for imp in imports.split(",")])) + result = [] + for module_file, imports in imported_modules: + if (PATH_TO_REPO / f"{module_file}.py").is_file(): + module_file = f"{module_file}.py" + elif (PATH_TO_REPO / module_file).is_dir() and (PATH_TO_REPO / module_file / "__init__.py").is_file(): + module_file = os.path.sep.join([module_file, "__init__.py"]) + imports = [imp for imp in imports if len(imp) > 0 and re.match("^[A-Za-z0-9_]*$", imp)] + if len(imports) > 0: + result.append((module_file, imports)) -def get_test_dependencies(test_fname): - """ - Get the dependencies of a test file. - """ - with open(os.path.join(PATH_TO_TRANFORMERS, test_fname), "r", encoding="utf-8") as f: - content = f.read() + if cache is not None: + cache[module_fname] = result - # Tests only have relative imports for other test files - # TODO Sylvain: handle relative imports cleanly - relative_imports = re.findall(r"from\s+(\.\S+)\s+import\s+([^\n]+)\n", content) - relative_imports = [test for test, imp in relative_imports if "# tests_ignore" not in imp] + return result - def _convert_relative_import_to_file(relative_import): - level = 0 - while relative_import.startswith("."): - level += 1 - relative_import = relative_import[1:] - directory = os.path.sep.join(test_fname.split(os.path.sep)[:-level]) - return os.path.join(directory, f"{relative_import.replace('.', os.path.sep)}.py") +def get_module_dependencies(module_fname, cache=None): + """ + Get the dependencies of a module from the module filename as a list of module filenames. This will resolve any + __init__ we pass: if we import from a submodule utils, the dependencies will be utils/foo.py and utils/bar.py (if + the objects imported actually come from utils.foo and utils.bar) not utils/__init__.py. + """ + dependencies = [] + imported_modules = extract_imports(module_fname, cache=cache) + # The while loop is to recursively traverse all inits we may encounter. + while len(imported_modules) > 0: + new_modules = [] + for module, imports in imported_modules: + # If we end up in an __init__ we are often not actually importing from this init (except in the case where + # the object is fully defined in the __init__) + if module.endswith("__init__.py"): + # So we get the imports from that init then try to find where our objects come from. + new_imported_modules = extract_imports(module, cache=cache) + for new_module, new_imports in new_imported_modules: + if any([i in new_imports for i in imports]): + if new_module not in dependencies: + new_modules.append((new_module, [i for i in new_imports if i in imports])) + imports = [i for i in imports if i not in new_imports] + if len(imports) > 0: + # If there are any objects lefts, they may be a submodule + path_to_module = PATH_TO_REPO / module.replace("__init__.py", "") + dependencies.extend( + [ + os.path.join(module.replace("__init__.py", ""), f"{i}.py") + for i in imports + if (path_to_module / f"{i}.py").is_file() + ] + ) + imports = [i for i in imports if not (path_to_module / f"{i}.py").is_file()] + if len(imports) > 0: + # Then if there are still objects left, they are fully defined in the init, so we keep it as a + # dependency. + dependencies.append(module) + else: + dependencies.append(module) - dependencies = [_convert_relative_import_to_file(relative_import) for relative_import in relative_imports] - return [f for f in dependencies if os.path.isfile(os.path.join(PATH_TO_TRANFORMERS, f))] + imported_modules = new_modules + return dependencies def create_reverse_dependency_tree(): """ Create a list of all edges (a, b) which mean that modifying a impacts b with a going over all module and test files. """ - modules = [ - str(f.relative_to(PATH_TO_TRANFORMERS)) - for f in (Path(PATH_TO_TRANFORMERS) / "src/transformers").glob("**/*.py") - ] - module_edges = [(d, m) for m in modules for d in get_module_dependencies(m)] - - tests = [str(f.relative_to(PATH_TO_TRANFORMERS)) for f in (Path(PATH_TO_TRANFORMERS) / "tests").glob("**/*.py")] - test_edges = [(d, t) for t in tests for d in get_test_dependencies(t)] + cache = {} + all_modules = list(PATH_TO_TRANFORMERS.glob("**/*.py")) + list(PATH_TO_TESTS.glob("**/*.py")) + all_modules = [str(mod.relative_to(PATH_TO_REPO)) for mod in all_modules] + edges = [(dep, mod) for mod in all_modules for dep in get_module_dependencies(mod, cache=cache)] - return module_edges + test_edges + return list(set(edges)) def get_tree_starting_at(module, edges): @@ -264,13 +365,17 @@ def get_tree_starting_at(module, edges): starting at module], [list of edges starting at the preceding level], ...] """ vertices_seen = [module] - new_edges = [edge for edge in edges if edge[0] == module and edge[1] != module] + new_edges = [edge for edge in edges if edge[0] == module and edge[1] != module and "__init__.py" not in edge[1]] tree = [module] while len(new_edges) > 0: tree.append(new_edges) final_vertices = list({edge[1] for edge in new_edges}) vertices_seen.extend(final_vertices) - new_edges = [edge for edge in edges if edge[0] in final_vertices and edge[1] not in vertices_seen] + new_edges = [ + edge + for edge in edges + if edge[0] in final_vertices and edge[1] not in vertices_seen and "__init__.py" not in edge[1] + ] return tree @@ -308,290 +413,159 @@ def create_reverse_dependency_map(): Create the dependency map from module/test filename to the list of modules/tests that depend on it (even recursively). """ - modules = [ - str(f.relative_to(PATH_TO_TRANFORMERS)) - for f in (Path(PATH_TO_TRANFORMERS) / "src/transformers").glob("**/*.py") - ] - # We grab all the dependencies of each module. - direct_deps = {m: get_module_dependencies(m) for m in modules} - - # We add all the dependencies of each test file - tests = [str(f.relative_to(PATH_TO_TRANFORMERS)) for f in (Path(PATH_TO_TRANFORMERS) / "tests").glob("**/*.py")] - direct_deps.update({t: get_test_dependencies(t) for t in tests}) - - all_files = modules + tests + cache = {} + all_modules = list(PATH_TO_TRANFORMERS.glob("**/*.py")) + list(PATH_TO_TESTS.glob("**/*.py")) + all_modules = [str(mod.relative_to(PATH_TO_REPO)) for mod in all_modules] + direct_deps = {m: get_module_dependencies(m, cache=cache) for m in all_modules} # This recurses the dependencies something_changed = True while something_changed: something_changed = False - for m in all_files: + for m in all_modules: for d in direct_deps[m]: + if d.endswith("__init__.py"): + continue if d not in direct_deps: raise ValueError(f"KeyError:{d}. From {m}") - for dep in direct_deps[d]: - if dep not in direct_deps[m]: - direct_deps[m].append(dep) - something_changed = True + new_deps = set(direct_deps[d]) - set(direct_deps[m]) + if len(new_deps) > 0: + direct_deps[m].extend(list(new_deps)) + something_changed = True # Finally we can build the reverse map. reverse_map = collections.defaultdict(list) - for m in all_files: - if m.endswith("__init__.py"): - reverse_map[m].extend(direct_deps[m]) + for m in all_modules: for d in direct_deps[m]: reverse_map[d].append(m) + for m in [f for f in all_modules if f.endswith("__init__.py")]: + direct_deps = get_module_dependencies(m, cache=cache) + deps = sum([reverse_map[d] for d in direct_deps if not d.endswith("__init__.py")], direct_deps) + reverse_map[m] = list(set(deps) - {m}) + return reverse_map -# Any module file that has a test name which can't be inferred automatically from its name should go here. A better -# approach is to (re-)name the test file accordingly, and second best to add the correspondence map here. -SPECIAL_MODULE_TO_TEST_MAP = { - "commands/add_new_model_like.py": "utils/test_add_new_model_like.py", - "configuration_utils.py": "test_configuration_common.py", - "convert_graph_to_onnx.py": "onnx/test_onnx.py", - "data/data_collator.py": "trainer/test_data_collator.py", - "deepspeed.py": "deepspeed/", - "feature_extraction_sequence_utils.py": "test_sequence_feature_extraction_common.py", - "feature_extraction_utils.py": "test_feature_extraction_common.py", - "file_utils.py": ["utils/test_file_utils.py", "utils/test_model_output.py"], - "image_processing_utils.py": ["test_image_processing_common.py", "utils/test_image_processing_utils.py"], - "image_transforms.py": "test_image_transforms.py", - "utils/generic.py": ["utils/test_file_utils.py", "utils/test_model_output.py", "utils/test_generic.py"], - "utils/hub.py": "utils/test_hub_utils.py", - "modelcard.py": "utils/test_model_card.py", - "modeling_flax_utils.py": "test_modeling_flax_common.py", - "modeling_tf_utils.py": ["test_modeling_tf_common.py", "utils/test_modeling_tf_core.py"], - "modeling_utils.py": ["test_modeling_common.py", "utils/test_offline.py"], - "models/auto/modeling_auto.py": [ - "models/auto/test_modeling_auto.py", - "models/auto/test_modeling_tf_pytorch.py", - "models/bort/test_modeling_bort.py", - "models/dit/test_modeling_dit.py", - ], - "models/auto/modeling_flax_auto.py": "models/auto/test_modeling_flax_auto.py", - "models/auto/modeling_tf_auto.py": [ - "models/auto/test_modeling_tf_auto.py", - "models/auto/test_modeling_tf_pytorch.py", - "models/bort/test_modeling_tf_bort.py", - ], - "models/gpt2/modeling_gpt2.py": [ - "models/gpt2/test_modeling_gpt2.py", - "models/megatron_gpt2/test_modeling_megatron_gpt2.py", - ], - "models/dpt/modeling_dpt.py": [ - "models/dpt/test_modeling_dpt.py", - "models/dpt/test_modeling_dpt_hybrid.py", - ], - "optimization.py": "optimization/test_optimization.py", - "optimization_tf.py": "optimization/test_optimization_tf.py", - "pipelines/__init__.py": all_pipeline_test_files + all_model_test_files, - "pipelines/base.py": all_pipeline_test_files + all_model_test_files, - "pipelines/text2text_generation.py": [ - "pipelines/test_pipelines_text2text_generation.py", - "pipelines/test_pipelines_summarization.py", - "pipelines/test_pipelines_translation.py", - ], - "pipelines/zero_shot_classification.py": "pipelines/test_pipelines_zero_shot.py", - "testing_utils.py": "utils/test_skip_decorators.py", - "tokenization_utils.py": ["test_tokenization_common.py", "tokenization/test_tokenization_utils.py"], - "tokenization_utils_base.py": ["test_tokenization_common.py", "tokenization/test_tokenization_utils.py"], - "tokenization_utils_fast.py": [ - "test_tokenization_common.py", - "tokenization/test_tokenization_utils.py", - "tokenization/test_tokenization_fast.py", - ], - "trainer.py": [ - "trainer/test_trainer.py", - "extended/test_trainer_ext.py", - "trainer/test_trainer_distributed.py", - "trainer/test_trainer_tpu.py", - ], - "train_pt_utils.py": "trainer/test_trainer_utils.py", - "utils/versions.py": "utils/test_versions_utils.py", -} - - -def module_to_test_file(module_fname): - """ - Returns the name of the file(s) where `module_fname` is tested. - """ - splits = module_fname.split(os.path.sep) - - # Special map has priority - short_name = os.path.sep.join(splits[2:]) - if short_name in SPECIAL_MODULE_TO_TEST_MAP: - test_file = SPECIAL_MODULE_TO_TEST_MAP[short_name] - if isinstance(test_file, str): - return f"tests/{test_file}" - return [f"tests/{f}" for f in test_file] - - module_name = splits[-1] - # Fast tokenizers are tested in the same file as the slow ones. - if module_name.endswith("_fast.py"): - module_name = module_name.replace("_fast.py", ".py") - - # Special case for pipelines submodules - if len(splits) >= 2 and splits[-2] == "pipelines": - default_test_file = f"tests/pipelines/test_pipelines_{module_name}" - return [default_test_file] + all_model_test_files - # Special case for benchmarks submodules - elif len(splits) >= 2 and splits[-2] == "benchmark": - return ["tests/benchmark/test_benchmark.py", "tests/benchmark/test_benchmark_tf.py"] - # Special case for commands submodules - elif len(splits) >= 2 and splits[-2] == "commands": - return "tests/utils/test_cli.py" - # Special case for onnx submodules - elif len(splits) >= 2 and splits[-2] == "onnx": - return ["tests/onnx/test_features.py", "tests/onnx/test_onnx.py", "tests/onnx/test_onnx_v2.py"] - # Special case for utils (not the one in src/transformers, the ones at the root of the repo). - elif len(splits) > 0 and splits[0] == "utils": - default_test_file = f"tests/repo_utils/test_{module_name}" - elif len(splits) > 4 and splits[2] == "models": - default_test_file = f"tests/models/{splits[3]}/test_{module_name}" - elif len(splits) > 2 and splits[2].startswith("generation"): - default_test_file = f"tests/generation/test_{module_name}" - elif len(splits) > 2 and splits[2].startswith("trainer"): - default_test_file = f"tests/trainer/test_{module_name}" - else: - default_test_file = f"tests/utils/test_{module_name}" - - if os.path.isfile(default_test_file): - return default_test_file - - # Processing -> processor - if "processing" in default_test_file: - test_file = default_test_file.replace("processing", "processor") - if os.path.isfile(test_file): - return test_file - - -# This list contains the list of test files we expect never to be launched from a change in a module/util. Those are -# launched separately. -EXPECTED_TEST_FILES_NEVER_TOUCHED = [ - "tests/generation/test_framework_agnostic.py", # Mixins inherited by actual test classes - "tests/mixed_int8/test_mixed_int8.py", # Mixed-int8 bitsandbytes test - "tests/pipelines/test_pipelines_common.py", # Actually checked by the pipeline based file - "tests/sagemaker/test_single_node_gpu.py", # SageMaker test - "tests/sagemaker/test_multi_node_model_parallel.py", # SageMaker test - "tests/sagemaker/test_multi_node_data_parallel.py", # SageMaker test - "tests/test_pipeline_mixin.py", # Contains no test of its own (only the common tester class) - "tests/utils/test_doc_samples.py", # Doc tests -] +def create_module_to_test_map(reverse_map=None, filter_models=False): + """ + Extract the tests from the reverse_dependency_map and potentially filters the model tests. + """ + if reverse_map is None: + reverse_map = create_reverse_dependency_map() + test_map = {module: [f for f in deps if f.startswith("tests")] for module, deps in reverse_map.items()} + if not filter_models: + return test_map -def _print_list(l): - return "\n".join([f"- {f}" for f in l]) + num_model_tests = len(list(PATH_TO_TESTS.glob("models/*"))) + + def has_many_models(tests): + model_tests = {Path(t).parts[2] for t in tests if t.startswith("tests/models/")} + return len(model_tests) > num_model_tests // 2 + + def filter_tests(tests): + return [t for t in tests if not t.startswith("tests/models/") or Path(t).parts[2] in IMPORTANT_MODELS] + + return {module: (filter_tests(tests) if has_many_models(tests) else tests) for module, tests in test_map.items()} -def sanity_check(): +def check_imports_all_exist(): """ - Checks that all test files can be touched by a modification in at least one module/utils. This test ensures that - newly-added test files are properly mapped to some module or utils, so they can be run by the CI. + Isn't used per se by the test fetcher but might be used later as a quality check. Putting this here for now so the + code is not lost. """ - # Grab all module and utils - all_files = [ - str(p.relative_to(PATH_TO_TRANFORMERS)) - for p in (Path(PATH_TO_TRANFORMERS) / "src/transformers").glob("**/*.py") - ] - all_files += [ - str(p.relative_to(PATH_TO_TRANFORMERS)) for p in (Path(PATH_TO_TRANFORMERS) / "utils").glob("**/*.py") - ] + cache = {} + all_modules = list(PATH_TO_TRANFORMERS.glob("**/*.py")) + list(PATH_TO_TESTS.glob("**/*.py")) + all_modules = [str(mod.relative_to(PATH_TO_REPO)) for mod in all_modules] + direct_deps = {m: get_module_dependencies(m, cache=cache) for m in all_modules} - # Compute all the test files we get from those. - test_files_found = [] - for f in all_files: - test_f = module_to_test_file(f) - if test_f is not None: - if isinstance(test_f, str): - test_files_found.append(test_f) - else: - test_files_found.extend(test_f) - - # Some of the test files might actually be subfolders so we grab the tests inside. - test_files = [] - for test_f in test_files_found: - if os.path.isdir(os.path.join(PATH_TO_TRANFORMERS, test_f)): - test_files.extend( - [ - str(p.relative_to(PATH_TO_TRANFORMERS)) - for p in (Path(PATH_TO_TRANFORMERS) / test_f).glob("**/test*.py") - ] - ) + for module, deps in direct_deps.items(): + for dep in deps: + if not (PATH_TO_REPO / dep).is_file(): + print(f"{module} has dependency on {dep} which does not exist.") + + +def _print_list(l): + return "\n".join([f"- {f}" for f in l]) + + +def create_json_map(test_files_to_run, json_output_file): + if json_output_file is None: + return + + test_map = {} + for test_file in test_files_to_run: + # `test_file` is a path to a test folder/file, starting with `tests/`. For example, + # - `tests/models/bert/test_modeling_bert.py` or `tests/models/bert` + # - `tests/trainer/test_trainer.py` or `tests/trainer` + # - `tests/test_modeling_common.py` + names = test_file.split(os.path.sep) + if names[1] == "models": + # take the part like `models/bert` for modeling tests + key = os.path.sep.join(names[1:3]) + elif len(names) > 2 or not test_file.endswith(".py"): + # test folders under `tests` or python files under them + # take the part like tokenization, `pipeline`, etc. for other test categories + key = os.path.sep.join(names[1:2]) else: - test_files.append(test_f) + # common test files directly under `tests/` + key = "common" - # Compare to existing test files - existing_test_files = [ - str(p.relative_to(PATH_TO_TRANFORMERS)) for p in (Path(PATH_TO_TRANFORMERS) / "tests").glob("**/test*.py") - ] - not_touched_test_files = [f for f in existing_test_files if f not in test_files] + if key not in test_map: + test_map[key] = [] + test_map[key].append(test_file) - should_be_tested = set(not_touched_test_files) - set(EXPECTED_TEST_FILES_NEVER_TOUCHED) - if len(should_be_tested) > 0: - raise ValueError( - "The following test files are not currently associated with any module or utils files, which means they " - f"will never get run by the CI:\n{_print_list(should_be_tested)}\n. Make sure the names of these test " - "files match the name of the module or utils they are testing, or adapt the constant " - "`SPECIAL_MODULE_TO_TEST_MAP` in `utils/tests_fetcher.py` to add them. If your test file is triggered " - "separately and is not supposed to be run by the regular CI, add it to the " - "`EXPECTED_TEST_FILES_NEVER_TOUCHED` constant instead." - ) + # sort the keys & values + keys = sorted(test_map.keys()) + test_map = {k: " ".join(sorted(test_map[k])) for k in keys} + with open(json_output_file, "w", encoding="UTF-8") as fp: + json.dump(test_map, fp, ensure_ascii=False) -def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, json_output_file=None): +def infer_tests_to_run( + output_file, diff_with_last_commit=False, filters=None, filter_models=True, json_output_file=None +): modified_files = get_modified_python_files(diff_with_last_commit=diff_with_last_commit) print(f"\n### MODIFIED FILES ###\n{_print_list(modified_files)}") # Create the map that will give us all impacted modules. - impacted_modules_map = create_reverse_dependency_map() + reverse_map = create_reverse_dependency_map() impacted_files = modified_files.copy() for f in modified_files: - if f in impacted_modules_map: - impacted_files.extend(impacted_modules_map[f]) + if f in reverse_map: + impacted_files.extend(reverse_map[f]) # Remove duplicates impacted_files = sorted(set(impacted_files)) print(f"\n### IMPACTED FILES ###\n{_print_list(impacted_files)}") # Grab the corresponding test files: - if "setup.py" in impacted_files: + if "setup.py" in modified_files: test_files_to_run = ["tests"] repo_utils_launch = True else: - # Grab the corresponding test files: - test_files_to_run = [] - for f in impacted_files: - # Modified test files are always added - if f.startswith("tests/"): - test_files_to_run.append(f) - # Example files are tested separately - elif f.startswith("examples/pytorch"): - test_files_to_run.append("examples/pytorch/test_pytorch_examples.py") - test_files_to_run.append("examples/pytorch/test_accelerate_examples.py") - elif f.startswith("examples/tensorflow"): - test_files_to_run.append("examples/tensorflow/test_tensorflow_examples.py") - elif f.startswith("examples/flax"): - test_files_to_run.append("examples/flax/test_flax_examples.py") - else: - new_tests = module_to_test_file(f) - if new_tests is not None: - if isinstance(new_tests, str): - test_files_to_run.append(new_tests) - else: - test_files_to_run.extend(new_tests) - - # Remove duplicates + # All modified tests need to be run. + test_files_to_run = [ + f for f in modified_files if f.startswith("tests") and f.split(os.path.sep)[-1].startswith("test") + ] + # Then we grab the corresponding test files. + test_map = create_module_to_test_map(reverse_map=reverse_map, filter_models=filter_models) + for f in modified_files: + if f in test_map: + test_files_to_run.extend(test_map[f]) test_files_to_run = sorted(set(test_files_to_run)) + # Remove SageMaker tests + test_files_to_run = [f for f in test_files_to_run if not f.split(os.path.sep)[1] == "sagemaker"] # Make sure we did not end up with a test file that was removed - test_files_to_run = [f for f in test_files_to_run if os.path.isfile(f) or os.path.isdir(f)] + test_files_to_run = [f for f in test_files_to_run if (PATH_TO_REPO / f).exists()] if filters is not None: filtered_files = [] - for filter in filters: - filtered_files.extend([f for f in test_files_to_run if f.startswith(filter)]) + for _filter in filters: + filtered_files.extend([f for f in test_files_to_run if f.startswith(_filter)]) test_files_to_run = filtered_files - repo_utils_launch = any(f.split(os.path.sep)[1] == "repo_utils" for f in test_files_to_run) + + repo_utils_launch = any(f.split(os.path.sep)[1] == "repo_utils" for f in modified_files) if repo_utils_launch: repo_util_file = Path(output_file).parent / "test_repo_utils.txt" @@ -610,34 +584,7 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j if "tests" in test_files_to_run: test_files_to_run = get_all_tests() - if json_output_file is not None: - test_map = {} - for test_file in test_files_to_run: - # `test_file` is a path to a test folder/file, starting with `tests/`. For example, - # - `tests/models/bert/test_modeling_bert.py` or `tests/models/bert` - # - `tests/trainer/test_trainer.py` or `tests/trainer` - # - `tests/test_modeling_common.py` - names = test_file.split(os.path.sep) - if names[1] == "models": - # take the part like `models/bert` for modeling tests - key = "/".join(names[1:3]) - elif len(names) > 2 or not test_file.endswith(".py"): - # test folders under `tests` or python files under them - # take the part like tokenization, `pipeline`, etc. for other test categories - key = "/".join(names[1:2]) - else: - # common test files directly under `tests/` - key = "common" - - if key not in test_map: - test_map[key] = [] - test_map[key].append(test_file) - - # sort the keys & values - keys = sorted(test_map.keys()) - test_map = {k: " ".join(sorted(test_map[k])) for k in keys} - with open(json_output_file, "w", encoding="UTF-8") as fp: - json.dump(test_map, fp, ensure_ascii=False) + create_json_map(test_files_to_run, json_output_file) def filter_tests(output_file, filters): @@ -667,11 +614,29 @@ def filter_tests(output_file, filters): f.write(" ".join(test_files)) +def parse_commit_message(commit_message): + """ + Parses the commit message to detect if a command is there to skip, force all or part of the CI. + + Returns a dictionary of strings to bools with keys skip, test_all_models and test_all. + """ + if commit_message is None: + return {"skip": False, "no_filter": False, "test_all": False} + + command_search = re.search(r"\[([^\]]*)\]", commit_message) + if command_search is not None: + command = command_search.groups()[0] + command = command.lower().replace("-", " ").replace("_", " ") + skip = command in ["ci skip", "skip ci", "circleci skip", "skip circleci"] + no_filter = set(command.split(" ")) == {"no", "filter"} + test_all = set(command.split(" ")) == {"test", "all"} + return {"skip": skip, "no_filter": no_filter, "test_all": test_all} + else: + return {"skip": False, "no_filter": False, "test_all": False} + + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument( - "--sanity_check", action="store_true", help="Only test that all tests and modules are accounted for." - ) parser.add_argument( "--output_file", type=str, default="test_list.txt", help="Where to store the list of tests to run" ) @@ -704,33 +669,54 @@ def filter_tests(output_file, filters): help="Will only print the tree of modules depending on the file passed.", default=None, ) + parser.add_argument( + "--commit_message", + type=str, + help="The commit message (which could contain a command to force all tests or skip the CI).", + default=None, + ) args = parser.parse_args() if args.print_dependencies_of is not None: print_tree_deps_of(args.print_dependencies_of) - elif args.sanity_check: - sanity_check() elif args.filter_tests: filter_tests(args.output_file, ["pipelines", "repo_utils"]) else: - repo = Repo(PATH_TO_TRANFORMERS) + repo = Repo(PATH_TO_REPO) + commit_message = repo.head.commit.message + commit_flags = parse_commit_message(commit_message) + if commit_flags["skip"]: + print("Force-skipping the CI") + quit() + if commit_flags["no_filter"]: + print("Running all tests fetched without filtering.") + if commit_flags["test_all"]: + print("Force-launching all tests") diff_with_last_commit = args.diff_with_last_commit if not diff_with_last_commit and not repo.head.is_detached and repo.head.ref == repo.refs.main: print("main branch detected, fetching tests against last commit.") diff_with_last_commit = True - try: - infer_tests_to_run( - args.output_file, - diff_with_last_commit=diff_with_last_commit, - filters=args.filters, - json_output_file=args.json_output_file, - ) - filter_tests(args.output_file, ["repo_utils"]) - except Exception as e: - print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.") + if not commit_flags["test_all"]: + try: + infer_tests_to_run( + args.output_file, + diff_with_last_commit=diff_with_last_commit, + filters=args.filters, + json_output_file=args.json_output_file, + filter_models=not commit_flags["no_filter"], + ) + filter_tests(args.output_file, ["repo_utils"]) + except Exception as e: + print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.") + commit_flags["test_all"] = True + + if commit_flags["test_all"]: with open(args.output_file, "w", encoding="utf-8") as f: if args.filters is None: f.write("./tests/") else: f.write(" ".join(args.filters)) + + test_files_to_run = get_all_tests() + create_json_map(test_files_to_run, args.json_output_file)